Skip to content

Commit

Permalink
fix(linker): fully support ref being None in ResolvedRef. In addition…
Browse files Browse the repository at this point in the history
…, better logic for removing duplicate refs.
  • Loading branch information
nsantacruz committed Nov 22, 2023
1 parent 7761eb2 commit befd778
Showing 1 changed file with 10 additions and 3 deletions.
13 changes: 10 additions & 3 deletions sefaria/model/linker/ref_resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -650,6 +650,13 @@ def is_match_correct(match: ResolvedRef) -> bool:

@staticmethod
def remove_superfluous_matches(thoroughness: ResolutionThoroughness, resolved_refs: List[ResolvedRef]) -> List[ResolvedRef]:
# make matches with refs that are essentially equivalent (i.e. refs cover same span) actually equivalent
resolved_refs.sort(key=lambda x: x.ref and x.ref.order_id())
for i, r in enumerate(resolved_refs[:-1]):
next_r = resolved_refs[i+1]
if r.ref.contains(next_r.ref) and next_r.ref.contains(r.ref):
next_r.ref = r.ref

# make unique
resolved_refs = list({r.ref: r for r in resolved_refs}.values())
if thoroughness >= ResolutionThoroughness.HIGH or len(resolved_refs) > 1:
Expand Down Expand Up @@ -706,15 +713,15 @@ def _merge_subset_matches(resolved_refs: List[ResolvedRef]) -> List[ResolvedRef]
Merge matches where one ref is contained in another ref
E.g. if matchA.ref == Ref("Genesis 1") and matchB.ref == Ref("Genesis 1:1"), matchA will be deleted and its parts will be appended to matchB's parts
"""
resolved_refs.sort(key=lambda x: x.ref and x.ref.order_id())
resolved_refs.sort(key=lambda x: "N/A" if x.ref is None else x.ref.order_id())
merged_resolved_refs = []
next_merged = False
for imatch, match in enumerate(resolved_refs[:-1]):
if match.is_ambiguous or match.ref is None or next_merged:
next_match = resolved_refs[imatch+1]
if match.is_ambiguous or match.ref is None or next_match.ref is None or next_merged:
merged_resolved_refs += [match]
next_merged = False
continue
next_match = resolved_refs[imatch+1]
if match.ref.index.title != next_match.ref.index.title:
# optimization, the easiest cases to check for
merged_resolved_refs += [match]
Expand Down

0 comments on commit befd778

Please sign in to comment.