From b30f4636a2acc36e69de79b09e77fd91d02f2017 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CKevin?= Date: Thu, 2 Oct 2025 18:13:14 -0600 Subject: [PATCH 1/2] fix(clean): exclude text inside elements with class "star-pagination" add test case --- eyecite/clean.py | 4 +++- tests/test_FindTest.py | 9 ++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/eyecite/clean.py b/eyecite/clean.py index d0fc1c5..11c6237 100644 --- a/eyecite/clean.py +++ b/eyecite/clean.py @@ -51,7 +51,9 @@ def html(html_content: str) -> str: parent::link | parent::head | parent::page-number | - parent::script)]""" + parent::script | + parent::*[@class="star-pagination"] + )]""" ) return " ".join(text) diff --git a/tests/test_FindTest.py b/tests/test_FindTest.py index 13bfe01..6c05d00 100644 --- a/tests/test_FindTest.py +++ b/tests/test_FindTest.py @@ -828,7 +828,14 @@ def test_find_citations(self): # Fix for index error when searching for case name ("

State v. Luna-Benitez (S53965). Alternative writ issued, dismissed, 342 Or 255

", [case_citation(volume="342", reporter="Or", page="255")], - {'clean_steps': ['html', 'inline_whitespace']}) + {'clean_steps': ['html', 'inline_whitespace']}), + # Test remove text with star-pagination class + ("

The somewhat similar cases of Crane v. Hyde Park, 135 *355 Mass. 147, and Mahoning County v. Young, 16 U.S. App. 253, also cited by the defendant, likewise turned upon a question of forfeiture for breach of a condition subsequent in a deed to a municipal corporation.

", + [case_citation(volume="135", reporter="Mass.", page="147", + metadata={"plaintiff": "Crane", + "defendant": "Hyde Park"} + )], + {'clean_steps': ['html', 'inline_whitespace']}) ) # fmt: on From 10c42daeb3ed12425e7e26d2808a6076f0da87bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CKevin?= Date: Thu, 2 Oct 2025 18:22:28 -0600 Subject: [PATCH 2/2] fix(clean): update CHANGES.md --- CHANGES.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGES.md b/CHANGES.md index 8e4ab05..800c66f 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -12,6 +12,7 @@ Changes: Fixes: - Modifies rendering of AhocorasickTokenizer parameter in API docs II +- Removed star-pagination markers from extracted text #293 ## Current