Skip to content

Commit

Permalink
refactor(linker): break up map_new_indices so it distinguishes betwee…
Browse files Browse the repository at this point in the history
…n mapping main span and part spans.
  • Loading branch information
nsantacruz committed Oct 26, 2023
1 parent 4a3797a commit 9d8dc2a
Showing 1 changed file with 12 additions and 5 deletions.
17 changes: 12 additions & 5 deletions sefaria/model/linker/ref_part.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,13 @@ def __init__(self, span: SpanOrToken, type: NamedEntityType, **cloneable_kwargs)
self.span = span
self.type = type

def map_new_indices(self, new_doc: Doc, new_indices: Tuple[int, int]) -> None:
"""
Remap self.span to new indices
"""
self.span = new_doc.char_span(*new_indices)
if self.span is None: raise InputError(f"${new_indices} don't match token boundaries. Using 'expand' alignment mode text is '{new_doc.char_span(*new_indices, alignment_mode='expand')}'")

@property
def text(self):
"""
Expand Down Expand Up @@ -447,13 +454,13 @@ def split_part(self, part: RawRefPart, str_end) -> Tuple['RawRef', RawRefPart, R
new_parts_to_match = self.parts_to_match
return self.clone(raw_ref_parts=new_parts, parts_to_match=new_parts_to_match), apart, bpart

def map_new_indices(self, new_doc: Doc, new_indices: Tuple[int, int], new_part_indices: List[Tuple[int, int]]) -> None:
def map_new_part_indices(self, new_part_indices: List[Tuple[int, int]]) -> None:
"""
Remap self.span and all spans of parts to new indices
"""
self.span = new_doc.char_span(*new_indices)
if self.span is None: raise InputError(f"${new_indices} don't match token boundaries. Using 'expand' alignment mode text is '{new_doc.char_span(*new_indices, alignment_mode='expand')}'")
start_char, _ = self.char_indices
doc_span = self.span.as_doc()
for part, temp_part_indices in zip(self.raw_ref_parts, new_part_indices):
part.span = doc_span.char_span(*[i-new_indices[0] for i in temp_part_indices])
if part.span is None: raise InputError(f"{temp_part_indices} doesn't match token boundaries for part {part}. Using 'expand' alignment mode text is '{new_doc.char_span(*temp_part_indices, alignment_mode='expand')}'")
part.span = doc_span.char_span(*[i-start_char for i in temp_part_indices])
if part.span is None:
raise InputError(f"{temp_part_indices} doesn't match token boundaries for part {part}.")

0 comments on commit 9d8dc2a

Please sign in to comment.