Skip to content

Commit

Permalink
fix(llm): allow lang key to not exist
Browse files Browse the repository at this point in the history
  • Loading branch information
nsantacruz committed Feb 18, 2024
1 parent 312b79a commit 26d07cb
Showing 1 changed file with 17 additions and 11 deletions.
28 changes: 17 additions & 11 deletions sefaria/helper/llm/topic_prompt.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,21 @@ def _lang_dict_by_func(func: Callable[[str], Any]):
return {lang: func(lang) for lang in ('en', 'he')}


def _get_commentary_from_link_dict(link_dict: dict) -> Optional[dict]:
if link_dict['category'] not in {'Commentary'}:
return
if not link_dict['sourceHasEn']:
return
commentary = {
"ref": link_dict['sourceRef'],
"text": _lang_dict_by_func(
lambda lang: JaggedTextArray(link_dict.get('text' if lang == 'en' else 'he', '')).flatten_to_string()),
}
commentary['text'] = _lang_dict_by_func(
lambda lang: re.sub(r"<[^>]+>", " ", TextChunk.strip_itags(commentary['text'][lang])))
return commentary


def _get_commentary_for_tref(tref: str) -> List[dict]:
"""
Return list of commentary for tref. Currently only considers English commentary.
Expand All @@ -25,17 +40,8 @@ def _get_commentary_for_tref(tref: str) -> List[dict]:
commentary = []

for link_dict in get_links(tref, with_text=True):
if link_dict['category'] not in {'Commentary'}:
continue
if not link_dict['sourceHasEn']:
continue
temp_commentary = {
"ref": link_dict['sourceRef'],
"text": _lang_dict_by_func(
lambda lang: JaggedTextArray(link_dict['text' if lang == 'en' else 'he']).flatten_to_string()),
}
temp_commentary['text'] = _lang_dict_by_func(
lambda lang: re.sub(r"<[^>]+>", " ", TextChunk.strip_itags(temp_commentary['text'][lang])))
temp_commentary = _get_commentary_from_link_dict(link_dict)
if not temp_commentary: continue
commentary += [temp_commentary]
return commentary

Expand Down

0 comments on commit 26d07cb

Please sign in to comment.