From 697590fe8154fcdd877aa2de29d77f0feaf87227 Mon Sep 17 00:00:00 2001 From: Brandon Liu Date: Wed, 16 Jul 2025 12:24:37 -0400 Subject: [PATCH 1/5] fix(citation): Ensure full_span is aligned for parallel citations --- eyecite/models.py | 2 ++ tests/test_FindTest.py | 18 +++++++++++++++++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/eyecite/models.py b/eyecite/models.py index 605a0f7..463fc1a 100644 --- a/eyecite/models.py +++ b/eyecite/models.py @@ -507,6 +507,8 @@ def is_parallel_citation(self, preceding: CaseCitation): # California style may have a year prior to citation; merge as well self.metadata.year = preceding.metadata.year self.year = preceding.year + # Parallel citations should have the same full_span_end + self.full_span_end = preceding.full_span_end @dataclass(eq=True, unsafe_hash=True) class Metadata(CaseCitation.Metadata): diff --git a/tests/test_FindTest.py b/tests/test_FindTest.py index 13bfe01..32caef5 100644 --- a/tests/test_FindTest.py +++ b/tests/test_FindTest.py @@ -828,7 +828,7 @@ def test_find_citations(self): # Fix for index error when searching for case name ("

State v. Luna-Benitez (S53965). Alternative writ issued, dismissed, 342 Or 255

", [case_citation(volume="342", reporter="Or", page="255")], - {'clean_steps': ['html', 'inline_whitespace']}) + {'clean_steps': ['html', 'inline_whitespace']}), ) # fmt: on @@ -1194,6 +1194,22 @@ def test_citation_fullspan(self): extracted.full_span(), (start_idx, len(sentence)), error_msg ) + def test_parallel_full_span(self): + """Parallel citations should have the same full_span_end + + Note: it seems that the full_span_end can sometimes differ for a parallel + citation due to the way POST_FULL_CITATION_REGEX is defined. Under certain + conditions, it can end up matching to the next citation as opposed to the end of + the current citation. However, we can trust that the post citation matching + worked correctly for the first of the parallel citations. + """ + text = "Kaiser Steel Corp. v. W.S. Ranch Co., 391 U.S. 593, 598, 88 S. Ct. 1753, 20 L.Ed.2d 835 (1968). We have previously held that the automatic stay provisions of the Bankruptcy Code may toll the statute of limitations under the Warsaw Convention, which is the precursor to the Montreal Convention. See Zicherman v. Korean Air Lines Co., Ltd., 516 F.3d 1237, 1254 (11th Cir. 2008)" + citations = get_citations(text) + full_span_end = citations[0].full_span_end + self.assertEqual(citations[0].full_span_end, full_span_end) + self.assertEqual(citations[1].full_span_end, full_span_end) + self.assertEqual(citations[2].full_span_end, full_span_end) + def test_reference_extraction_using_resolved_names(self): """Can we extract a reference citation using resolved metadata?""" texts = [ From c9300e9b7e9df06d9c4e1f9e5d7923f8f7b87132 Mon Sep 17 00:00:00 2001 From: Brandon Liu Date: Wed, 16 Jul 2025 12:26:21 -0400 Subject: [PATCH 2/5] CHANGES --- CHANGES.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGES.md b/CHANGES.md index 8e4ab05..336a448 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -12,6 +12,7 @@ Changes: Fixes: - Modifies rendering of AhocorasickTokenizer parameter in API docs II +- Fixes full_span_end not always being the same among parallel citations ## Current From 8ea0c3044f3b16f611b404f419264fa26c3ba6fd Mon Sep 17 00:00:00 2001 From: Brandon Liu Date: Wed, 16 Jul 2025 13:20:13 -0400 Subject: [PATCH 3/5] Another fix --- eyecite/models.py | 2 +- eyecite/regexes.py | 2 +- tests/test_FindTest.py | 9 ++++----- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/eyecite/models.py b/eyecite/models.py index 463fc1a..f86cb9a 100644 --- a/eyecite/models.py +++ b/eyecite/models.py @@ -508,7 +508,7 @@ def is_parallel_citation(self, preceding: CaseCitation): self.metadata.year = preceding.metadata.year self.year = preceding.year # Parallel citations should have the same full_span_end - self.full_span_end = preceding.full_span_end + # self.full_span_end = preceding.full_span_end @dataclass(eq=True, unsafe_hash=True) class Metadata(CaseCitation.Metadata): diff --git a/eyecite/regexes.py b/eyecite/regexes.py index df1cddd..540c27c 100644 --- a/eyecite/regexes.py +++ b/eyecite/regexes.py @@ -308,7 +308,7 @@ def reference_pin_cite_re(regexes): [\(\[] # opening paren or bracket (?: (?: - (?P.*?) # treat anything before date as court + (?P[^)]*?) # treat anything before date as court (?= # lookahead to stop when we see a month or year \s+{MONTH_REGEX} | \s+{YEAR_REGEX} diff --git a/tests/test_FindTest.py b/tests/test_FindTest.py index 32caef5..8ec20a1 100644 --- a/tests/test_FindTest.py +++ b/tests/test_FindTest.py @@ -1203,12 +1203,11 @@ def test_parallel_full_span(self): the current citation. However, we can trust that the post citation matching worked correctly for the first of the parallel citations. """ - text = "Kaiser Steel Corp. v. W.S. Ranch Co., 391 U.S. 593, 598, 88 S. Ct. 1753, 20 L.Ed.2d 835 (1968). We have previously held that the automatic stay provisions of the Bankruptcy Code may toll the statute of limitations under the Warsaw Convention, which is the precursor to the Montreal Convention. See Zicherman v. Korean Air Lines Co., Ltd., 516 F.3d 1237, 1254 (11th Cir. 2008)" + text = "Kaiser Steel Corp. v. W.S. Ranch Co., 391 U.S. 593, 598, 88 S. Ct. 1753, 20 L.Ed.2d 835 (1968). We have previously held that the automatic stay provisions of the Bankruptcy Code may toll the statute the Montreal Convention. See Zicherman v. Korean Air Lines Co., Ltd., 516 F.3d 1237, 1254 (11th Cir. 2008)" citations = get_citations(text) - full_span_end = citations[0].full_span_end - self.assertEqual(citations[0].full_span_end, full_span_end) - self.assertEqual(citations[1].full_span_end, full_span_end) - self.assertEqual(citations[2].full_span_end, full_span_end) + self.assertEqual(citations[0].full_span_end, 94) + self.assertEqual(citations[1].full_span_end, 94) + self.assertEqual(citations[2].full_span_end, 94) def test_reference_extraction_using_resolved_names(self): """Can we extract a reference citation using resolved metadata?""" From 746a35d69cbe8eb605b5b2bb4744509fb19fb16b Mon Sep 17 00:00:00 2001 From: Brandon Liu Date: Wed, 16 Jul 2025 13:25:24 -0400 Subject: [PATCH 4/5] Remove commented out code --- eyecite/models.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/eyecite/models.py b/eyecite/models.py index f86cb9a..605a0f7 100644 --- a/eyecite/models.py +++ b/eyecite/models.py @@ -507,8 +507,6 @@ def is_parallel_citation(self, preceding: CaseCitation): # California style may have a year prior to citation; merge as well self.metadata.year = preceding.metadata.year self.year = preceding.year - # Parallel citations should have the same full_span_end - # self.full_span_end = preceding.full_span_end @dataclass(eq=True, unsafe_hash=True) class Metadata(CaseCitation.Metadata): From 60142d99bd31232abdd0718c6eb40a950a3a4cfe Mon Sep 17 00:00:00 2001 From: Brandon Liu Date: Wed, 16 Jul 2025 13:28:43 -0400 Subject: [PATCH 5/5] Add closing bracket as well --- eyecite/regexes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eyecite/regexes.py b/eyecite/regexes.py index 540c27c..d3e8051 100644 --- a/eyecite/regexes.py +++ b/eyecite/regexes.py @@ -308,7 +308,7 @@ def reference_pin_cite_re(regexes): [\(\[] # opening paren or bracket (?: (?: - (?P[^)]*?) # treat anything before date as court + (?P[^)\]]*?) # treat anything before date as court (?= # lookahead to stop when we see a month or year \s+{MONTH_REGEX} | \s+{YEAR_REGEX}