Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ Changes:

Fixes:
- Modifies rendering of AhocorasickTokenizer parameter in API docs II
- Fixes full_span_end not always being the same among parallel citations

## Current

Expand Down
2 changes: 1 addition & 1 deletion eyecite/regexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,7 @@ def reference_pin_cite_re(regexes):
[\(\[] # opening paren or bracket
(?:
(?:
(?P<court>.*?) # treat anything before date as court
(?P<court>[^)\]]*?) # treat anything before date as court
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using only parentheses () is sufficient. I think square brackets [] are not needed in any current scenario.

(?= # lookahead to stop when we see a month or year
\s+{MONTH_REGEX} |
\s+{YEAR_REGEX}
Expand Down
17 changes: 16 additions & 1 deletion tests/test_FindTest.py
Original file line number Diff line number Diff line change
Expand Up @@ -828,7 +828,7 @@ def test_find_citations(self):
# Fix for index error when searching for case name
("<p>State v. Luna-Benitez (S53965). Alternative writ issued, dismissed, 342 Or 255</p>",
[case_citation(volume="342", reporter="Or", page="255")],
{'clean_steps': ['html', 'inline_whitespace']})
{'clean_steps': ['html', 'inline_whitespace']}),
)

# fmt: on
Expand Down Expand Up @@ -1194,6 +1194,21 @@ def test_citation_fullspan(self):
extracted.full_span(), (start_idx, len(sentence)), error_msg
)

def test_parallel_full_span(self):
"""Parallel citations should have the same full_span_end

Note: it seems that the full_span_end can sometimes differ for a parallel
citation due to the way POST_FULL_CITATION_REGEX is defined. Under certain
conditions, it can end up matching to the next citation as opposed to the end of
the current citation. However, we can trust that the post citation matching
worked correctly for the first of the parallel citations.
"""
text = "Kaiser Steel Corp. v. W.S. Ranch Co., 391 U.S. 593, 598, 88 S. Ct. 1753, 20 L.Ed.2d 835 (1968). We have previously held that the automatic stay provisions of the Bankruptcy Code may toll the statute the Montreal Convention. See Zicherman v. Korean Air Lines Co., Ltd., 516 F.3d 1237, 1254 (11th Cir. 2008)"
citations = get_citations(text)
self.assertEqual(citations[0].full_span_end, 94)
self.assertEqual(citations[1].full_span_end, 94)
self.assertEqual(citations[2].full_span_end, 94)

def test_reference_extraction_using_resolved_names(self):
"""Can we extract a reference citation using resolved metadata?"""
texts = [
Expand Down
Loading