diff --git a/CHANGES.md b/CHANGES.md index 8e4ab05..f17f381 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -8,7 +8,7 @@ Features: - Changes: -- +- Handle sequential citations (in close proximity) better Fixes: - Modifies rendering of AhocorasickTokenizer parameter in API docs II diff --git a/eyecite/helpers.py b/eyecite/helpers.py index 84213cb..e2be175 100644 --- a/eyecite/helpers.py +++ b/eyecite/helpers.py @@ -218,6 +218,7 @@ def _scan_for_case_boundaries( word_str.endswith(";") or word_str.endswith("”") or word_str.endswith('"') + or (word_str.endswith("),") and state["v_token"] is not None) ): state["start_index"] = index + 2 state["candidate_case_name"] = _extract_text( @@ -227,7 +228,10 @@ def _scan_for_case_boundaries( # Handle year before citation if re.match(r"\(\d{4}\)", word_str): - state["title_starting_index"] = index - 1 + # Don't override title_starting_index if we already found a v. token + # and have a case name set, as this would corrupt the extraction + if state["v_token"] is None or not state["candidate_case_name"]: + state["title_starting_index"] = index - 1 state["pre_cite_year"] = word_str[1:5] continue @@ -264,11 +268,13 @@ def _scan_for_case_boundaries( # Handle "v" token - store it but don't break yet if _is_v_token(word): - state["v_token"] = word - state["start_index"] = index - 2 - state["candidate_case_name"] = _extract_text( - words, state["start_index"], state["title_starting_index"] - ) + # Don't overwrite a case name we already found from a more recent v. token + if state["v_token"] is None: + state["v_token"] = word + state["start_index"] = index - 2 + state["candidate_case_name"] = _extract_text( + words, state["start_index"], state["title_starting_index"] + ) continue # Break on likely new sentence after "v" token diff --git a/tests/test_FindTest.py b/tests/test_FindTest.py index 13bfe01..8120e9e 100644 --- a/tests/test_FindTest.py +++ b/tests/test_FindTest.py @@ -828,7 +828,13 @@ def test_find_citations(self): # Fix for index error when searching for case name ("
State v. Luna-Benitez (S53965). Alternative writ issued, dismissed, 342 Or 255
", [case_citation(volume="342", reporter="Or", page="255")], - {'clean_steps': ['html', 'inline_whitespace']}) + {'clean_steps': ['html', 'inline_whitespace']}), + # Sequential full case citations. + ("West v. Atkins, 487 U.S. 42, 54-58 (1988), Polk Cty. v. Dodson, 454 U.S. 312, 325-26 (1981), and Monell v. Department of Soc. Servs., 436 U.S. 658, 694 (1978)", + [case_citation(volume="487", reporter="U.S.", page="42", metadata={"plaintiff": "West", "defendant": "Atkins", "year": "1988", "pin_cite": "54-58"}), + case_citation(volume="454", reporter="U.S.", page="312", metadata={"plaintiff": "Polk Cty.", "defendant": "Dodson", "year": "1981", "pin_cite": "325-26"}), + case_citation(volume="436", reporter="U.S.", page="658", metadata={"plaintiff": "Monell", "defendant": "Department of Soc. Servs.", "year": "1978", "pin_cite": "694"}), + ]), ) # fmt: on