Skip to content

Commit

Permalink
Merge branch 'master' into apidocs
Browse files Browse the repository at this point in the history
  • Loading branch information
rneiss committed Jan 4, 2024
2 parents 213cd20 + e06cc4c commit bbb7dad
Show file tree
Hide file tree
Showing 116 changed files with 3,440 additions and 1,870 deletions.
4 changes: 1 addition & 3 deletions .github/workflows/continuous.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -309,12 +309,10 @@ jobs:
| sed 's/[^a-z0-9\.\-]//g')
>> $GITHUB_OUTPUT
- name: Start Job
run: envsubst '${GITHUB_RUN_ID},${DEPLOY_ENV},${WEB_IMAGE_NAME},${WEB_IMAGE_TAG},${TIMESTAMP}' < ./build/ci/pyTestPod.yaml | kubectl apply -f -
run: ./build/ci/createJobFromRollout.sh $GITHUB_RUN_ID $DEPLOY_ENV
env:
# dependent on GITHUB_RUN_ID, which is implicitly passed in
DEPLOY_ENV: sandbox-${{ steps.get-sha.outputs.sha_short }}
WEB_IMAGE_NAME: us-east1-docker.pkg.dev/${{secrets.DEV_PROJECT}}/containers/sefaria-web-${{ steps.branch-name.outputs.current_branch }}
WEB_IMAGE_TAG: sha-${{ steps.get-sha.outputs.sha_short }}
- name: Wait For Job To Finish
run: ./build/ci/waitForCIJob.bash
timeout-minutes: 60
Expand Down
62 changes: 62 additions & 0 deletions api/api_warnings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import django
django.setup()
from sefaria.model import *
from typing import List
from enum import Enum

class APIWarningCode(Enum):
APINoVersion = 101
APINoLanguageVersion = 102
APINoSourceText = 103
APINoTranslationText = 104

"""
classes for data warnings in API calls.
used when part of the data that was requested exists and returned, and part is missing.
"""

class APIDatawarning():
"""
general class
"""

def __init__(self):
pass


class TextsAPIResponseMessage(APIDatawarning):
"""
class for returning a message and an warning code
"""

def get_message(self) -> dict:
return {'warning_code': self.warning_code.value,
'message': self.message}


class APINoVersion(TextsAPIResponseMessage):

def __init__(self, oref: Ref, vtitle: str, lang: str):
self.warning_code = APIWarningCode.APINoVersion
self.message = f'We do not have version named {vtitle} with language {lang} for {oref}'


class APINoLanguageVersion(TextsAPIResponseMessage):

def __init__(self, oref: Ref, langs: List[str]):
self.warning_code = APIWarningCode.APINoLanguageVersion
self.message = f'We do not have the language you asked for {oref}. Available languages are {langs}'


class APINoSourceText(TextsAPIResponseMessage):

def __init__(self, oref: Ref):
self.warning_code = APIWarningCode.APINoSourceText
self.message = f'We do not have the source text for {oref}'


class APINoTranslationText(TextsAPIResponseMessage):

def __init__(self, oref: Ref):
self.warning_code = APIWarningCode.APINoTranslationText
self.message = f'We do not have a translation for {oref}'
218 changes: 218 additions & 0 deletions api/tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,218 @@
from django.test.client import Client
import django
django.setup()
from reader.tests import SefariaTestCase
import json
from api.api_warnings import APIWarningCode


c = Client()


class APITextsTests(SefariaTestCase):

def test_api_get_text_default(self):
response = c.get('/api/v3/texts/Genesis.1')
self.assertEqual(200, response.status_code)
data = json.loads(response.content)
self.assertTrue(len(data["versions"]) == 1)
self.assertTrue(data["versions"][0]['actualLanguage'] == 'he')
self.assertEqual(data["book"], "Genesis")
self.assertEqual(data["categories"], ["Tanakh", "Torah"])
self.assertEqual(data["sections"], ['1'])
self.assertEqual(data["toSections"], ['1'])

def test_api_get_text_source_all(self):
response = c.get('/api/v3/texts/Shabbat.22a?version=source|all')
self.assertEqual(200, response.status_code)
data = json.loads(response.content)
self.assertTrue(len(data["versions"]) > 1)
self.assertTrue(all(v['actualLanguage'] == 'he' for v in data["versions"]))
self.assertEqual(data["book"], "Shabbat")
self.assertEqual(data["categories"], ["Talmud", "Bavli", "Seder Moed"])
self.assertEqual(data["sections"], ["22a"])
self.assertEqual(data["toSections"], ["22a"])

def test_api_get_text_source(self):
response = c.get('/api/v3/texts/Shabbat.22a?version=source')
self.assertEqual(200, response.status_code)
data = json.loads(response.content)
self.assertEqual(len(data["versions"]), 1)
self.assertEqual(data["versions"][0]['versionTitle'], "William Davidson Edition - Vocalized Aramaic")

def test_api_get_text_translation_all(self):
response = c.get('/api/v3/texts/Shabbat.22a?version=translation|all')
self.assertEqual(200, response.status_code)
data = json.loads(response.content)
self.assertTrue(len(data["versions"]) > 1)
self.assertTrue(any(v['actualLanguage'] == 'en' for v in data["versions"]))
self.assertEqual(data["book"], "Shabbat")
self.assertEqual(data["categories"], ["Talmud", "Bavli", "Seder Moed"])
self.assertEqual(data["sections"], ["22a"])
self.assertEqual(data["toSections"], ["22a"])

def test_api_get_text_translation(self):
response = c.get('/api/v3/texts/Shabbat.22a?version=translation')
self.assertEqual(200, response.status_code)
data = json.loads(response.content)
self.assertEqual(len(data["versions"]), 1)
self.assertEqual(data["versions"][0]['versionTitle'], "William Davidson Edition - English")

def test_api_get_text_lang_all(self):
response = c.get('/api/v3/texts/Rashi_on_Genesis.2.3?version=english|all')
self.assertEqual(200, response.status_code)
data = json.loads(response.content)
self.assertTrue(len(data["versions"]) > 1)
self.assertTrue(all(v['actualLanguage'] == 'en' for v in data["versions"]))
self.assertEqual(data["book"], "Rashi on Genesis")
self.assertEqual(data["collectiveTitle"], "Rashi")
self.assertEqual(data["categories"], ["Tanakh", "Rishonim on Tanakh", "Rashi", "Torah"])
self.assertEqual(data["sections"], ['2', '3'])
self.assertEqual(data["toSections"], ['2', '3'])

def test_api_get_text_specific(self):
response = c.get('/api/v3/texts/Tosafot_on_Sukkah.2a.4.1?version=hebrew|Vilna_Edition')
self.assertEqual(200, response.status_code)
data = json.loads(response.content)
self.assertEqual(len(data["versions"]), 1)
self.assertEqual(data["versions"][0]['actualLanguage'], 'he')
self.assertEqual(data["versions"][0]['versionTitle'], 'Vilna Edition')
self.assertEqual(data["book"], "Tosafot on Sukkah")
self.assertEqual(data["collectiveTitle"], "Tosafot")
self.assertEqual(data["categories"], ["Talmud", "Bavli", "Rishonim on Talmud", "Tosafot", "Seder Moed"])
self.assertEqual(data["sections"], ["2a", '4', '1'])
self.assertEqual(data["toSections"], ["2a", '4', '1'])

def test_api_get_text_primary_all(self):
response = c.get('/api/v3/texts/Genesis.1?version=primary|all')
data = json.loads(response.content)
self.assertTrue(len(data["versions"]) > 3)
self.assertTrue(all(v['actualLanguage'] == 'he' for v in data["versions"]))

def test_api_get_text_primary(self):
response = c.get('/api/v3/texts/Shabbat.22a?version=primary')
self.assertEqual(200, response.status_code)
data = json.loads(response.content)
self.assertEqual(len(data["versions"]), 1)
self.assertEqual(data["versions"][0]['versionTitle'], "William Davidson Edition - Vocalized Aramaic")

def test_api_get_text_two_params(self):
response = c.get('/api/v3/texts/Genesis.1?version=hebrew|Tanach with Nikkud&version=english|all')
data = json.loads(response.content)
self.assertTrue(len(data["versions"]) > 7)
self.assertEqual(data["versions"][0]['actualLanguage'], 'he')
self.assertTrue(all(v['actualLanguage'] == 'en' for v in data["versions"][1:]))

def test_api_get_text_range(self):
response = c.get('/api/v3/texts/Job.5.2-4')
self.assertEqual(200, response.status_code)
data = json.loads(response.content)
self.assertEqual(data["sections"], ['5', '2'])
self.assertEqual(data["toSections"], ['5', '4'])

def text_api_virtual_node(self):
response = c.get('/api/v3/texts/BDB, א')
self.assertEqual(200, response.status_code)
data = json.loads(response.content)
self.assertEqual(len(data['versions']), 1)
self.assertEqual(data['versions'][0]['text'], ['<big><span dir="rtl">א</span></big> <em>Āleph</em>, first letter; in post Biblical Hebrew = numeral 1 (and so in margin of printed MT); א̈= 1000; no evidence of this usage in OT times.'])

def test_api_get_text_bad_text(self):
response = c.get('/api/v3/texts/Life_of_Pi.13.13')
self.assertEqual(400, response.status_code)
data = json.loads(response.content)
self.assertEqual(data["error"], "Could not find title in reference: Life of Pi.13.13")

def test_api_get_text_out_of_bound(self):
response = c.get('/api/v3/texts/Genesis.999')
data = json.loads(response.content)
self.assertEqual(data["error"], "Genesis ends at Chapter 50.")

def test_api_get_text_too_many_hyphens(self):
response = c.get('/api/v3/texts/Genesis.9-4-5')
data = json.loads(response.content)
self.assertEqual(data["error"], "Couldn't understand ref 'Genesis.9-4-5' (too many -'s).")

def test_api_get_text_bad_sections(self):
response = c.get('/api/v3/texts/Job.6-X')
self.assertEqual(400, response.status_code)
data = json.loads(response.content)
self.assertEqual(data["error"], "Couldn't understand text sections: 'Job.6-X'.")

def test_api_get_text_empty_ref(self):
response = c.get("/api/v3/texts/Berakhot.1a")
self.assertEqual(400, response.status_code)
data = json.loads(response.content)
self.assertEqual(data["error"], "We have no text for Berakhot 1a.")

def test_api_get_text_no_source(self):
response = c.get("/api/v3/texts/The_Book_of_Maccabees_I.1?version=english|Brenton's_Septuagint&version=source")
self.assertEqual(200, response.status_code)
data = json.loads(response.content)
self.assertEqual(len(data["versions"]), 1)
self.assertEqual(data['warnings'][0]['source']['warning_code'], APIWarningCode.APINoSourceText.value)
self.assertEqual(data['warnings'][0]['source']['message'], 'We do not have the source text for The Book of Maccabees I 1')

def test_api_get_text_no_translation(self):
response = c.get("/api/v3/texts/Shuvi_Shuvi_HaShulamit?version=translation")
self.assertEqual(200, response.status_code)
data = json.loads(response.content)
self.assertEqual(len(data["versions"]), 0)
self.assertEqual(data['warnings'][0]['translation']['warning_code'], APIWarningCode.APINoTranslationText.value)
self.assertEqual(data['warnings'][0]['translation']['message'], 'We do not have a translation for Shuvi Shuvi HaShulamit')

def test_api_get_text_no_language(self):
response = c.get("/api/v3/texts/The_Book_of_Maccabees_I.1?version=english|Brenton's_Septuagint&version=sgrg|all")
self.assertEqual(200, response.status_code)
data = json.loads(response.content)
self.assertEqual(len(data["versions"]), 1)
self.assertEqual(data['warnings'][0]['sgrg|all']['warning_code'], APIWarningCode.APINoLanguageVersion.value)
self.assertEqual(data['warnings'][0]['sgrg|all']['message'],
"We do not have the language you asked for The Book of Maccabees I 1. Available languages are ['english', 'hebrew']")

def test_api_get_text_no_version(self):
response = c.get("/api/v3/texts/The_Book_of_Maccabees_I.1?version=english|Brenton's_Septuagint&version=hebrew|Kishkoosh")
self.assertEqual(200, response.status_code)
data = json.loads(response.content)
self.assertEqual(len(data["versions"]), 1)
self.assertEqual(data['warnings'][0]['hebrew|Kishkoosh']['warning_code'], APIWarningCode.APINoVersion.value)
self.assertEqual(data['warnings'][0]['hebrew|Kishkoosh']['message'],
'We do not have version named Kishkoosh with language hebrew for The Book of Maccabees I 1')

def test_fill_in_missing_segments(self):
vtitle = "Maimonides' Mishneh Torah, edited by Philip Birnbaum, New York, 1967"
response = c.get(f"/api/v3/texts/Mishneh_Torah,_Sabbath_1?version=english|{vtitle}&fill_in_missing_segments=true")
self.assertEqual(200, response.status_code)
data = json.loads(response.content)
self.assertTrue(len(data['versions'][0]['text']) > 2)
self.assertTrue(data['versions'][0].get('sources'))
self.assertEqual(data['versions'][0]['sources'][0], vtitle)
self.assertNotEqual(data['versions'][0]['sources'][2], vtitle)

def test_without_fill_in_missing_segments(self):
vtitle = "Maimonides' Mishneh Torah, edited by Philip Birnbaum, New York, 1967"
response = c.get(f"/api/v3/texts/Mishneh_Torah,_Sabbath_1?version=english|{vtitle}")
self.assertEqual(200, response.status_code)
data = json.loads(response.content)
self.assertEqual(len(data['versions'][0]['text']), 2)
self.assertFalse(data['versions'][0].get('sources'))

def test_wrap_all_entities(self):
vtitle = "The Contemporary Torah, Jewish Publication Society, 2006"
response = c.get(f"/api/v3/texts/Genesis%2010?version=english|{vtitle}&return_format=wrap_all_entities")
self.assertEqual(200, response.status_code)
data = json.loads(response.content)
self.assertTrue('<a class ="refLink"' in data['versions'][0]['text'][3])
self.assertTrue('<a href="/topics' in data['versions'][0]['text'][8])

def test_text_only(self):
response = c.get(f"/api/v3/texts/Shulchan_Arukh%2C_Orach_Chayim.1:1?return_format=text_only")
self.assertEqual(200, response.status_code)
data = json.loads(response.content)
self.assertFalse('<' in data['versions'][0]['text'])

def test_error_return_format(self):
response = c.get(f"/api/v3/texts/Shulchan_Arukh%2C_Orach_Chayim.1:1?return_format=not_valid")
self.assertEqual(400, response.status_code)
data = json.loads(response.content)
self.assertEqual(data['error'], "return_format should be one of those formats: ['default', 'wrap_all_entities', 'text_only'].")
58 changes: 58 additions & 0 deletions api/views.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
from sefaria.model import *
from sefaria.model.text_manager import TextManager
from sefaria.client.util import jsonResponse
from django.views import View
from .api_warnings import *


class Text(View):

RETURN_FORMATS = ['default', 'wrap_all_entities', 'text_only']

def dispatch(self, request, *args, **kwargs):
try:
self.oref = Ref.instantiate_ref_with_legacy_parse_fallback(kwargs['tref'])
except Exception as e:
return jsonResponse({'error': getattr(e, 'message', str(e))}, status=400)
return super().dispatch(request, *args, **kwargs)

@staticmethod
def split_piped_params(params_string) -> List[str]:
params = params_string.split('|')
if len(params) < 2:
params.append('')
params[1] = params[1].replace('_', ' ')
return params

def _handle_warnings(self, data):
data['warnings'] = []
for lang, vtitle in data['missings']:
if lang == 'source':
warning = APINoSourceText(self.oref)
elif lang == 'translation':
warning = APINoTranslationText(self.oref)
elif vtitle and vtitle != 'all':
warning = APINoVersion(self.oref, vtitle, lang)
else:
warning = APINoLanguageVersion(self.oref, data['available_langs'])
representing_string = f'{lang}|{vtitle}' if vtitle else lang
data['warnings'].append({representing_string: warning.get_message()})
data.pop('missings')
data.pop('available_langs')
return data

def get(self, request, *args, **kwargs):
if self.oref.is_empty() and not self.oref.index_node.is_virtual:
return jsonResponse({'error': f'We have no text for {self.oref}.'}, status=400)
versions_params = request.GET.getlist('version', [])
if not versions_params:
versions_params = ['primary']
versions_params = [self.split_piped_params(param_str) for param_str in versions_params]
fill_in_missing_segments = request.GET.get('fill_in_missing_segments', False)
return_format = request.GET.get('return_format', 'default')
if return_format not in self.RETURN_FORMATS:
return jsonResponse({'error': f'return_format should be one of those formats: {self.RETURN_FORMATS}.'}, status=400)
text_manager = TextManager(self.oref, versions_params, fill_in_missing_segments, return_format)
data = text_manager.get_versions_for_query()
data = self._handle_warnings(data)
return jsonResponse(data)
32 changes: 32 additions & 0 deletions build/ci/createJobFromRollout.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#!/bin/bash

GITHUB_RUN_ID=$1
DEPLOY_ENV=$2

cat << EOF > job.yaml
apiVersion: batch/v1
kind: Job
metadata:
labels:
ci-run: "${GITHUB_RUN_ID}"
test-name: pytest
name: $DEPLOY_ENV-pytest-sandbox-$GITHUB_RUN_ID
spec:
backoffLimit: 2 # in waitForCIJob, we look for 2 fails before declaring failure. This could be made a variable.
template:
metadata:
labels:
ci-run: "${GITHUB_RUN_ID}"
test-name: pytest
spec:
EOF

kubectl get rollout $DEPLOY_ENV-web -o yaml | yq '.spec.template.spec' > spec.yaml
yq -i '.spec.template.spec += load("spec.yaml")' job.yaml
yq -i '.spec.template.spec.restartPolicy = "Never"' job.yaml
yq -i '.spec.template.spec.containers[0].args = ["-c", "pip3 install pytest-django; pytest -v -m \"not deep and not failing\" ./sefaria; echo $? > /dev/stdout; exit 0;"]' job.yaml
yq -i 'del(.spec.template.spec.containers[0].startupProbe)' job.yaml
yq -i 'del(.spec.template.spec.containers[0].livenessProbe)' job.yaml
yq -i 'del(.spec.template.spec.containers[0].readinessProbe)' job.yaml

kubectl apply -f job.yaml
2 changes: 1 addition & 1 deletion build/ci/integration-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ localSettings:
DEBUG: true
DOMAIN_LANGUAGE: {}
APSCHEDULER_NAME: "apscheduler-{{ .Values.deployEnv }}"
SEARCH_ADMIN: "http://elasticsearch-data:9200"
SEARCH_URL: "http://elasticsearch-data:9200"
TURN_SERVER: ''
USE_CLOUDFLARE: false
FRONT_END_URL: "http://${NAME}.integration.sefaria.org"
Expand Down
Loading

0 comments on commit bbb7dad

Please sign in to comment.