refactor(linker): break up map_new_indices so it distinguishes betwee…

…n mapping main span and part spans.
Sefaria · Oct 26, 2023 · 9d8dc2a · 9d8dc2a
1 parent 4a3797a
commit 9d8dc2a
Showing 1 changed file with 12 additions and 5 deletions.
diff --git a/sefaria/model/linker/ref_part.py b/sefaria/model/linker/ref_part.py
@@ -316,6 +316,13 @@ def __init__(self, span: SpanOrToken, type: NamedEntityType, **cloneable_kwargs)
         self.span = span
         self.type = type
 
+    def map_new_indices(self, new_doc: Doc, new_indices: Tuple[int, int]) -> None:
+        """
+        Remap self.span to new indices
+        """
+        self.span = new_doc.char_span(*new_indices)
+        if self.span is None: raise InputError(f"${new_indices} don't match token boundaries. Using 'expand' alignment mode text is '{new_doc.char_span(*new_indices, alignment_mode='expand')}'")
+
     @property
     def text(self):
         """
@@ -447,13 +454,13 @@ def split_part(self, part: RawRefPart, str_end) -> Tuple['RawRef', RawRefPart, R
             new_parts_to_match = self.parts_to_match
         return self.clone(raw_ref_parts=new_parts, parts_to_match=new_parts_to_match), apart, bpart
 
-    def map_new_indices(self, new_doc: Doc, new_indices: Tuple[int, int], new_part_indices: List[Tuple[int, int]]) -> None:
+    def map_new_part_indices(self, new_part_indices: List[Tuple[int, int]]) -> None:
         """
         Remap self.span and all spans of parts to new indices
         """
-        self.span = new_doc.char_span(*new_indices)
-        if self.span is None: raise InputError(f"${new_indices} don't match token boundaries. Using 'expand' alignment mode text is '{new_doc.char_span(*new_indices, alignment_mode='expand')}'")
+        start_char, _ = self.char_indices
         doc_span = self.span.as_doc()
         for part, temp_part_indices in zip(self.raw_ref_parts, new_part_indices):
-            part.span = doc_span.char_span(*[i-new_indices[0] for i in temp_part_indices])
-            if part.span is None: raise InputError(f"{temp_part_indices} doesn't match token boundaries for part {part}. Using 'expand' alignment mode text is '{new_doc.char_span(*temp_part_indices, alignment_mode='expand')}'")
+            part.span = doc_span.char_span(*[i-start_char for i in temp_part_indices])
+            if part.span is None:
+                raise InputError(f"{temp_part_indices} doesn't match token boundaries for part {part}.")