Skip to content

Commit

Permalink
fix(api): reduce code redundancies, update tests
Browse files Browse the repository at this point in the history
  • Loading branch information
saengel committed Jul 10, 2024
1 parent 676a1fb commit da70c51
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 11 deletions.
2 changes: 1 addition & 1 deletion sefaria/model/tests/text_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -817,7 +817,7 @@ def test_remove_html():
pasuk_with_html = "</big>בּ<big>ְרֵאשִׁ֖ית בָּרָ֣א אֱלֹהִ֑ים אֵ֥ת הַשָּׁמַ֖יִם וְאֵ֥ת הָאָֽרֶץ"
pasuk_without_html = "בְּרֵאשִׁ֖ית בָּרָ֣א אֱלֹהִ֑ים אֵ֥ת הַשָּׁמַ֖יִם וְאֵ֥ת הָאָֽרֶץ"

pasuk_with_br = "Happy is the <big>man</big> who has not followed the counsel of the wicked,<br>or taken the path of sinners,<br>or joined the company of the insolent;"
pasuk_with_br = "Happy is the <big>man</big> who has not followed the counsel of the wicked,<br/>or taken the path of sinners,<br/>or joined the company of the insolent;"
pasuk_without_br = "Happy is the man who has not followed the counsel of the wicked, or taken the path of sinners, or joined the company of the insolent;"

assert model.TextChunk.remove_html(pasuk_with_html) == pasuk_without_html
Expand Down
19 changes: 9 additions & 10 deletions sefaria/model/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -1171,22 +1171,21 @@ def sanitize_text(cls, t):

@staticmethod
def remove_html(t):

def conditional_replace(match):
tag = match.group()
if tag == "<br/>":
return " "
return ""

if isinstance(t, list):
for i, v in enumerate(t):
if isinstance(v, str):
tags = re.findall('<[^>]+>', t[i])
for tag in tags:
if tag == "<br>":
t[i] = re.sub("<br>", " ", v)
t[i] = re.sub('<[^>]+>', "", v)
t[i] = re.sub('<[^>]+>', conditional_replace, v)
else:
t[i] = AbstractTextRecord.remove_html(v)
elif isinstance(t, str):
tags = re.findall('<[^>]+>', t)
for tag in tags:
if tag == "<br>":
t = re.sub("<br>", " ", t)
t = re.sub('<[^>]+>', "", t)
t = re.sub('<[^>]+>', conditional_replace, t)
else:
return False
return t
Expand Down

0 comments on commit da70c51

Please sign in to comment.