From 41f6c3a7cb8957750c6d6e5ab631a1eb84b6a0a8 Mon Sep 17 00:00:00 2001 From: Avishay Cohen Date: Mon, 2 Jun 2025 11:10:27 +0300 Subject: [PATCH 1/2] added windows configs to ignore --- .gitignore | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 7089a4c..43b1809 100644 --- a/.gitignore +++ b/.gitignore @@ -132,4 +132,8 @@ dmypy.json .vscode # Pycharm -.idea \ No newline at end of file +.idea + +# Windows python venv +/Scripts/ +pyvenv.cfg \ No newline at end of file From e14a0f02f7413c0f96237b642dd247d4090c14ca Mon Sep 17 00:00:00 2001 From: Avishay Cohen Date: Mon, 2 Jun 2025 11:12:07 +0300 Subject: [PATCH 2/2] add raise and logging of errors during parsing. also added tests to see correct errors are raised --- src/mpd_parser/exceptions.py | 3 ++ src/mpd_parser/parser.py | 15 ++++++++- tests/test_manifets.py | 59 +++++++++++++++++++++++++++++++++++- 3 files changed, 75 insertions(+), 2 deletions(-) diff --git a/src/mpd_parser/exceptions.py b/src/mpd_parser/exceptions.py index ba64860..b64936c 100644 --- a/src/mpd_parser/exceptions.py +++ b/src/mpd_parser/exceptions.py @@ -7,6 +7,9 @@ class UnicodeDeclaredError(Exception): """ Raised when the XML has an encoding declaration in it's manifest and the parser did not remove it """ description = "xml has encoding declaration, lxml cannot process it" +class UnknownValueError(Exception): + """ Raised when the XML parsing fails on unexpected issue, check error for more information """ + description = "lxml failed to parse manifest, verify the input" class UnknownElementTreeParseError(Exception): """ Raised after a etree parse operation fails on an unexpected error """ diff --git a/src/mpd_parser/parser.py b/src/mpd_parser/parser.py index 0702dd9..c87e3b4 100644 --- a/src/mpd_parser/parser.py +++ b/src/mpd_parser/parser.py @@ -2,15 +2,19 @@ Main module of the package, Parser class """ +import logging from re import Match, sub from urllib.request import urlopen from lxml import etree -from mpd_parser.exceptions import UnicodeDeclaredError, UnknownElementTreeParseError +from mpd_parser.exceptions import UnicodeDeclaredError, UnknownElementTreeParseError, UnknownValueError from mpd_parser.models.composite_tags import MPD +# module level logger, application will configure formatting and handlers +logger = logging.getLogger(__name__) +# Regular expression to match encoding declaration in XML ENCODING_PATTERN = r"<\?.*?\s(encoding=\"\S*\").*\?>" @@ -48,7 +52,10 @@ def cut_and_burn(match: Match) -> str: except ValueError as err: if "Unicode" in err.args[0]: raise UnicodeDeclaredError() from err + logger.exception("Failed to parse manifest string") + raise UnknownValueError() from err except Exception as err: + logger.exception("Failed to parse manifest string") raise UnknownElementTreeParseError() from err if encoding: return MPD(root, encoding=encoding[0].groups()[0]) @@ -69,7 +76,10 @@ def from_file(cls, manifest_file_name: str) -> MPD: except ValueError as err: if "Unicode" in err.args[0]: raise UnicodeDeclaredError() from err + logger.exception("Failed to parse manifest file %s", manifest_file_name) + raise UnknownValueError() from err except Exception as err: + logger.exception("Failed to parse manifest file %s", manifest_file_name) raise UnknownElementTreeParseError() from err return MPD(tree.getroot()) @@ -89,7 +99,10 @@ def from_url(cls, url: str) -> MPD: except ValueError as err: if "Unicode" in err.args[0]: raise UnicodeDeclaredError() from err + logger.exception("Failed to parse manifest from URL %s", url) + raise UnknownValueError() from err except Exception as err: + logger.exception("Failed to parse manifest from URL %s", url) raise UnknownElementTreeParseError() from err return MPD(tree.getroot()) diff --git a/tests/test_manifets.py b/tests/test_manifets.py index ec45853..a98a288 100644 --- a/tests/test_manifets.py +++ b/tests/test_manifets.py @@ -1,13 +1,24 @@ """ Test the parsing of full manifests """ +import io import os -from pytest import mark +from pytest import mark, raises +from mpd_parser.exceptions import UnicodeDeclaredError, UnknownElementTreeParseError, UnknownValueError from mpd_parser.parser import Parser from tests.conftest import touch_attributes, MANIFESTS_DIR +class DummyFile(io.BytesIO): + def __enter__(self): + return self + def __exit__(self, exc_type, exc_val, exc_tb): + self.close() + +def dummy_urlopen(url): + return DummyFile(b"") + @mark.parametrize("input_file", [ "./../manifests/bigBuckBunny-onDemend.mpd", @@ -104,3 +115,49 @@ def test_to_string(input_file): 0].range == \ transformed_mpd.periods[0].adaptation_sets[0].representations[0].segment_bases[0].initializations[ 0].range + +@mark.parametrize( + "exception,patch_func", + [ + (UnicodeDeclaredError, lambda: ValueError("Unicode something")), + (UnknownValueError, lambda: ValueError("Some other value error")), + (UnknownElementTreeParseError, lambda: RuntimeError("Some runtime error")), + ] +) +def test_from_string_error_handling(monkeypatch, exception, patch_func): + def fake_parse(*args, **kwargs): + raise patch_func() + monkeypatch.setattr("mpd_parser.parser.etree.fromstring", fake_parse) + with raises(exception): + Parser.from_string("") + +@mark.parametrize( + "exception,patch_func", + [ + (UnicodeDeclaredError, lambda: ValueError("Unicode something")), + (UnknownValueError, lambda: ValueError("Some other value error")), + (UnknownElementTreeParseError, lambda: RuntimeError("Some runtime error")), + ] +) +def test_from_file_error_handling(monkeypatch, exception, patch_func): + def fake_parse(*args, **kwargs): + raise patch_func() + monkeypatch.setattr("mpd_parser.parser.etree.parse", fake_parse) + with raises(exception): + Parser.from_file("dummy_file.mpd") + +@mark.parametrize( + "exception,patch_func", + [ + (UnicodeDeclaredError, lambda: ValueError("Unicode something")), + (UnknownValueError, lambda: ValueError("Some other value error")), + (UnknownElementTreeParseError, lambda: RuntimeError("Some runtime error")), + ] +) +def test_from_url_error_handling(monkeypatch, exception, patch_func): + def fake_parse(*args, **kwargs): + raise patch_func() + monkeypatch.setattr("mpd_parser.parser.etree.parse", fake_parse) + monkeypatch.setattr("mpd_parser.parser.urlopen", dummy_urlopen) + with raises(exception): + Parser.from_url("http://dummy.url/manifest.mpd") \ No newline at end of file