Skip to content

Commit

Permalink
feat(linker): add functions to linker_index_converter.py that will ai…
Browse files Browse the repository at this point in the history
…d adding new indexes to linker.
  • Loading branch information
nsantacruz committed Nov 22, 2023
1 parent fa0045a commit 0eaab15
Showing 1 changed file with 38 additions and 6 deletions.
44 changes: 38 additions & 6 deletions sefaria/helper/linker_index_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,17 @@ def create_term(self, **kwargs):
self.context_and_primary_title_to_term[(kwargs.get('context'), term.get_primary_title('en'))] = term
return term

def get_or_create_term_for_titled_obj(self, obj, context=None, new_alt_titles=None, title_modifier=None, title_adder=None):
term = self.get_existing_term_for_titled_obj(obj, new_alt_titles, title_modifier, title_adder)
if not term:
return self.create_term_from_titled_obj(obj, context, new_alt_titles, title_modifier, title_adder)
return term

def get_existing_term_for_titled_obj(self, obj, new_alt_titles=None, title_modifier=None, title_adder=None):
en_title, he_title, alt_en_titles, alt_he_titles = self._make_titles_for_term(obj, new_alt_titles,
title_modifier, title_adder)
return NonUniqueTerm().load({"titles.text": {"$all": [en_title, he_title] + alt_en_titles + alt_he_titles}})

def create_term_from_titled_obj(self, obj, context=None, new_alt_titles=None, title_modifier=None, title_adder=None):
"""
Create a NonUniqueTerm from 'titled object' (see explanation of `obj` param)
Expand Down Expand Up @@ -103,6 +114,15 @@ def title_adder(lang, title):
...
"""
en_title, he_title, alt_en_titles, alt_he_titles = self._make_titles_for_term(obj, new_alt_titles,
title_modifier, title_adder)
term = self.create_term(en=en_title, he=he_title, context=context, alt_en=alt_en_titles, alt_he=alt_he_titles)
if isinstance(obj, Term):
self.old_term_map[obj.name] = term
return term

@staticmethod
def _make_titles_for_term(obj, new_alt_titles=None, title_modifier=None, title_adder=None):
new_alt_titles = new_alt_titles or []
title_group = obj if isinstance(obj, TitleGroup) else obj.title_group
en_title = title_group.primary_title('en')
Expand All @@ -128,10 +148,7 @@ def title_adder(lang, title):
# make unique
alt_en_titles = list(set(alt_en_titles))
alt_he_titles = list(set(alt_he_titles))
term = self.create_term(en=en_title, he=he_title, context=context, alt_en=alt_en_titles, alt_he=alt_he_titles)
if isinstance(obj, Term):
self.old_term_map[obj.name] = term
return term
return en_title, he_title, alt_en_titles, alt_he_titles


class LinkerCategoryConverter:
Expand Down Expand Up @@ -375,14 +392,26 @@ def _update_lengths(self):
outer_shape = base_outer_shape
self.index.nodes.lengths = [outer_shape] + ac[1:]

@staticmethod
def get_all_alt_struct_nodes(index):
def alt_struct_nodes_helper(node, nodes):
nodes.append(node)
for child in node.children:
alt_struct_nodes_helper(child, nodes)

nodes = []
for node in index.get_alt_struct_roots():
alt_struct_nodes_helper(node, nodes)
return nodes

def convert(self):
if self.get_alt_structs:
alt_struct_dict = self.get_alt_structs(self.index)
if alt_struct_dict:
for name, root in alt_struct_dict.items():
self.index.set_alt_structure(name, root)
self._traverse_nodes(self.index.nodes, self.node_visitor, is_alt_node=False)
alt_nodes = self.index.get_alt_struct_leaves()
alt_nodes = self.get_all_alt_struct_nodes(self.index)
for inode, node in enumerate(alt_nodes):
self.node_visitor(node, 1, inode, len(alt_nodes), True)
self._update_lengths() # update lengths for good measure
Expand Down Expand Up @@ -425,4 +454,7 @@ def node_visitor(self, node, depth, isibling, num_siblings, is_alt_node):
if other_fields_dict is not None:
for key, val in other_fields_dict.items():
if val is None: continue
setattr(node, key, val)
if val == "DELETE!":
delattr(node, key)
else:
setattr(node, key, val)

0 comments on commit 0eaab15

Please sign in to comment.