diff --git a/CHANGES.md b/CHANGES.md index 8e4ab05..800c66f 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -12,6 +12,7 @@ Changes: Fixes: - Modifies rendering of AhocorasickTokenizer parameter in API docs II +- Removed star-pagination markers from extracted text #293 ## Current diff --git a/eyecite/clean.py b/eyecite/clean.py index d0fc1c5..11c6237 100644 --- a/eyecite/clean.py +++ b/eyecite/clean.py @@ -51,7 +51,9 @@ def html(html_content: str) -> str: parent::link | parent::head | parent::page-number | - parent::script)]""" + parent::script | + parent::*[@class="star-pagination"] + )]""" ) return " ".join(text) diff --git a/tests/test_FindTest.py b/tests/test_FindTest.py index 13bfe01..6c05d00 100644 --- a/tests/test_FindTest.py +++ b/tests/test_FindTest.py @@ -828,7 +828,14 @@ def test_find_citations(self): # Fix for index error when searching for case name ("
State v. Luna-Benitez (S53965). Alternative writ issued, dismissed, 342 Or 255
", [case_citation(volume="342", reporter="Or", page="255")], - {'clean_steps': ['html', 'inline_whitespace']}) + {'clean_steps': ['html', 'inline_whitespace']}), + # Test remove text with star-pagination class + ("The somewhat similar cases of Crane v. Hyde Park, 135 *355 Mass. 147, and Mahoning County v. Young, 16 U.S. App. 253, also cited by the defendant, likewise turned upon a question of forfeiture for breach of a condition subsequent in a deed to a municipal corporation.
", + [case_citation(volume="135", reporter="Mass.", page="147", + metadata={"plaintiff": "Crane", + "defendant": "Hyde Park"} + )], + {'clean_steps': ['html', 'inline_whitespace']}) ) # fmt: on