Skip to content

Commit

Permalink
Merge pull request #1809 from Sefaria/topic-prompt-generator
Browse files Browse the repository at this point in the history
Topic prompt generator
  • Loading branch information
nsantacruz committed Mar 31, 2024
2 parents cb986ae + f191d04 commit f099595
Show file tree
Hide file tree
Showing 33 changed files with 1,190 additions and 94 deletions.
24 changes: 24 additions & 0 deletions build/ci/production-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,30 @@ varnish:
ref: varnish-secret-production
monitoring:
enabled: true
tasks:
enabled: true
resources:
requests:
memory: "1Gi"
cpu: "500m"
limits:
memory: "6Gi"
cpu: "1000m"
replicaCount: 1
redis:
url: ""
port: "26397"
brokerDBNumber: 2
resultBackendDBNumber: 3
sentinelURL: "redis-headless.redis.svc.cluster.local"
transportOptions:
master: mymaster
redisPassword:
ref: redis-password
sentinelPassword:
ref: sentinel-password
queues:
llm: llm-llm
ingress:
hosts:
- host: "sefaria.org"
Expand Down
10 changes: 10 additions & 0 deletions helm-chart/sefaria-project/templates/_helpers.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -175,3 +175,13 @@ preferredDuringSchedulingIgnoredDuringExecution:
- "true"
{{- end }}
{{- end }}

{{/*
Setup complete tasks queue info
*/}}
{{- define "sefaria.tasks.internalQueues" }}
tasks: {{ .Values.deployEnv }}-tasks
{{- end }}
{{- define "sefaria.tasks.queues" }}
{{- merge (fromYaml (include "sefaria.tasks.internalQueues" . )) .Values.tasks.queues | toYaml }}
{{- end }}
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,18 @@ data:
'ROTATE_REFRESH_TOKENS': True,
'SIGNING_KEY': os.getenv("SIMPLE_JWT_SIGNING_KEY"),
}
# Celery
# Either define SENTINEL_HEADLESS_URL if using sentinel or REDIS_URL for a simple redis instance
REDIS_URL = os.getenv("REDIS_URL")
REDIS_PORT = os.getenv("REDIS_PORT")
REDIS_PASSWORD = os.getenv("REDIS_PASSWORD")
CELERY_REDIS_BROKER_DB_NUM = os.getenv("CELERY_REDIS_BROKER_DB_NUM")
CELERY_REDIS_RESULT_BACKEND_DB_NUM = os.getenv("CELERY_REDIS_RESULT_BACKEND_DB_NUM")
CELERY_QUEUES = json.loads(os.getenv("CELERY_QUEUES", "{}"))
SENTINEL_HEADLESS_URL = os.getenv("SENTINEL_HEADLESS_URL")
SENTINEL_TRANSPORT_OPTS = json.loads(os.getenv("SENTINEL_TRANSPORT_OPTS", "{}"))
SENTINEL_PASSWORD = os.getenv("SENTINEL_PASSWORD")
MOBILE_APP_KEY = os.getenv("MOBILE_APP_KEY")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,12 @@ data:
SENTRY_CODE_VERSION: {{ .Values.web.containerImage.tag }}
FAIL_GRACEFULLY: "{{ .Values.localSettings.FAIL_GRACEFULLY }}"
SEARCH_HOST: {{ .Values.nginx.SEARCH_HOST | quote }}
{{- if .Values.tasks.enabled }}
REDIS_URL: {{ .Values.tasks.redis.url }}
REDIS_PORT: {{ .Values.tasks.redis.port | quote }}
CELERY_REDIS_BROKER_DB_NUM: {{ .Values.tasks.redis.brokerDBNumber | quote }}
CELERY_REDIS_RESULT_BACKEND_DB_NUM: {{ .Values.tasks.redis.resultBackendDBNumber | quote }}
CELERY_QUEUES: {{ fromYaml (include "sefaria.tasks.queues" .) | toJson | quote }}
SENTINEL_HEADLESS_URL: {{ .Values.tasks.redis.sentinelURL }}
SENTINEL_TRANSPORT_OPTS: {{ .Values.tasks.redis.transportOptions | toJson | quote }}
{{- end }}
153 changes: 153 additions & 0 deletions helm-chart/sefaria-project/templates/rollout/task.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
{{- if .Values.tasks.enabled }}
{{- $QueueConfig := (fromYaml (include "sefaria.tasks.queues" . )) -}}
---
# apiVersion: apps/v1
# kind: Deployment
apiVersion: argoproj.io/v1alpha1
kind: Rollout
metadata:
name: {{ .Values.deployEnv }}-tasks
labels:
deployEnv: "{{ .Values.deployEnv }}"
app: tasks-{{ .Values.deployEnv }}
# releaseRevision: "{{ .Release.Revision }}"
annotations:
rollout.argoproj.io/revision: "{{ .Release.Revision }}"
spec:
strategy:
blueGreen:
activeService: task-{{ .Values.deployEnv }}
autoPromotionEnabled: true
prePromotionAnalysis:
templates:
# - templateName: rollout-readiness-{{ .Values.deployEnv }}
- templateName: rollout-ready-{{ .Values.deployEnv }}
args:
- name: healthcheck-hostname
value: "nginx-{{ .Values.deployEnv }}-{{ .Release.Revision }}"
{{- if .Values.deploymentMessage }}
postPromotionAnalysis:
templates:
- templateName: rollout-complete-{{ .Values.deployEnv }}
args:
- name: revision
value: "{{ .Release.Revision }}"
- name: deployment
value: {{ .Values.deployEnv }}
- name: version
value: {{ .Values.web.containerImage.tag }}
- name: chartVersion
value: {{ .Chart.Version }}
{{- end }}
selector:
matchLabels:
app: tasks-{{ .Values.deployEnv }}
# releaseRevision: "{{ .Release.Revision }}"
revisionHistoryLimit: 2
replicas: {{ .Values.tasks.replicaCount }}
progressDeadlineSeconds: 1200
template:
metadata:
labels:
app: tasks-{{ .Values.deployEnv }}
tier: application
deployEnv: "{{ .Values.deployEnv }}"
stackRole: celery
releaseRevision: "{{ .Release.Revision }}"
spec:
affinity:
podAntiAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
- labelSelector:
matchExpressions:
- key: app
operator: In
values:
- mongo
topologyKey: kubernetes.io/hostname
- labelSelector:
matchExpressions:
- key: app
operator: In
values:
- "tasks-{{ .Values.deployEnv }}"
topologyKey: kubernetes.io/hostname
nodeAffinity:
{{- include "sefaria.nodeAffinities" . | nindent 10 }}
containers:
- name: tasks
image: "{{ .Values.web.containerImage.imageRegistry }}:{{ .Values.web.containerImage.tag }}"
imagePullPolicy: Always
command: ["celery"]
args: ["-A", "sefaria.celery_setup.app","worker", "-Q", "{{ $QueueConfig.tasks }}", "-l", "INFO"]
env:
- name: ENV_NAME
value: "{{ .Values.deployEnv }}"
- name: STACK_COMPONENT
value: tasks
- name: REDIS_HOST
value: "redis-{{ .Values.deployEnv }}"
- name: NODEJS_HOST
value: "node-{{ .Values.deployEnv }}-{{ .Release.Revision }}"
- name: VARNISH_HOST
value: "varnish-{{ .Values.deployEnv }}-{{ .Release.Revision }}"
- name: HELM_REVISION
value: "{{ .Release.Revision }}"
envFrom:
{{- if .Values.tasks.enabled }}
- secretRef:
name: {{ .Values.tasks.redis.sentinelPassword.ref }}
- secretRef:
name: {{ .Values.tasks.redis.redisPassword.ref }}
{{- end }}
- secretRef:
name: {{ template "sefaria.secrets.elasticUser" . }}
- secretRef:
name: {{ .Values.secrets.localSettings.ref }}
optional: true
- configMapRef:
name: local-settings-{{ .Values.deployEnv }}
- secretRef:
name: local-settings-secrets-{{ .Values.deployEnv }}
optional: true
- configMapRef:
name: local-settings-web-{{ .Values.deployEnv }}
optional: true
- secretRef:
name: local-settings-web-secrets-{{ .Values.deployEnv }}
optional: true
resources: {{ toYaml .Values.tasks.resources | nindent 10 }}
volumeMounts:
- mountPath: /app/sefaria/local_settings.py
name: local-settings
subPath: local_settings.py
readOnly: true
- mountPath: /client-secret
name: client-secret
readOnly: true
- mountPath: /app/logging-secret.json
name: logging-secret
subPath: logging-secret.json
readOnly: true
- name: elastic-cert
mountPath: /etc/ssl/certs/elastic
readOnly: true
volumes:
- name: local-settings
configMap:
name: local-settings-file-{{ .Values.deployEnv }}
items:
- key: local_settings.py
path: local_settings.py
- name: elastic-cert
secret:
secretName: {{ template "sefaria.secrets.elasticCertificate" . }}
optional: true
- name: client-secret
secret:
secretName: {{ template "sefaria.secrets.googleClient" . }} # needs to be checked if it's a reference object or the data object we created.
- name: logging-secret
secret:
secretName: {{ template "sefaria.secrets.logging" . }}
optional: true
{{- end }}
6 changes: 6 additions & 0 deletions helm-chart/sefaria-project/templates/rollout/web.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,12 @@ spec:
value: k8s.container.name=app,k8s.deployment.name={{ .Values.deployEnv }}-web,k8s.namespace.name={{ .Release.Namespace }},k8s.node.name=$(OTEL_RESOURCE_ATTRIBUTES_NODE_NAME),k8s.pod.name=$(OTEL_RESOURCE_ATTRIBUTES_POD_NAME)
{{- end }}
envFrom:
{{- if .Values.tasks.enabled }}
- secretRef:
name: {{ .Values.tasks.redis.sentinelPassword.ref }}
- secretRef:
name: {{ .Values.tasks.redis.redisPassword.ref }}
{{- end }}
- secretRef:
name: {{ template "sefaria.secrets.elasticUser" . }}
- secretRef:
Expand Down
13 changes: 13 additions & 0 deletions helm-chart/sefaria-project/templates/service/task.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
apiVersion: v1
kind: Service
metadata:
name: task-{{ .Values.deployEnv }}
labels:
app: tasks-{{ .Values.deployEnv }}
deployEnv: "{{ .Values.deployEnv }}"
stackRole: task
spec:
ports:
- port: 80
selector:
app: "tasks-{{ .Values.deployEnv }}"
34 changes: 33 additions & 1 deletion helm-chart/sefaria-project/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,30 @@ varnish:
image: ibmcom/varnish-metrics-exporter
tag: 0.32.0

tasks:
enabled: false
resources:
requests:
memory: "100Mi"
cpu: "100m"
limits:
memory: "200Mi"
cpu: "400m"
replicaCount: 1
redis:
url: "redis://127.0.0.1"
port: "6397"
brokerDBNumber: "0"
resultBackendDBNumber: "1"
sentinelURL: ""
transportOptions: {}
redisPassword:
ref: redis-password
sentinelPassword:
ref: sentinel-password
queues:
llm: default-llm

ingress:
# You can set path: serviceName: and port: for each host. By default they are
# set to '/*', 'nginx', '80' respectively for each. Do note that changing this
Expand Down Expand Up @@ -351,6 +375,15 @@ secrets:
# RECAPTCHA_PUBLIC_KEY:
# RECAPTCHA_PRIVATE_KEY:
# SIMPLE_JWT_SIGNING_KEY:
# REDIS_PORT
# REDIS_PASSWORD
# CELERY_REDIS_BROKER_DB_NUM
# CELERY_REDIS_RESULT_BACKEND_DB_NUM
# CELERY_QUEUES
# SENTINEL_HEADLESS_URL
# SENTINEL_TRANSPORT_OPTS
# SENTINEL_PASSWORD
# REDIS_URL
# MOBILE_APP_KEY:
backupManager:
# If you're using a reference to an existing secret then the data: section
Expand Down Expand Up @@ -381,7 +414,6 @@ secrets:
ref: elastic-admin
# data:


# Settings for various cronjobs
cronJobs:
# Settings for regenerating long cached data
Expand Down
43 changes: 41 additions & 2 deletions reader/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -3093,6 +3093,21 @@ def topics_list_api(request):
return response


@staff_member_required
def generate_topic_prompts_api(request, slug: str):
if request.method == "POST":
from sefaria.helper.llm.tasks import generate_and_save_topic_prompts
from sefaria.helper.llm.topic_prompt import get_ref_context_hints_by_lang
topic = Topic.init(slug)
post_body = json.loads(request.body)
ref_topic_links = post_body.get('ref_topic_links')
for lang, ref__context_hints in get_ref_context_hints_by_lang(ref_topic_links).items():
orefs, context_hints = zip(*ref__context_hints)
generate_and_save_topic_prompts(lang, topic, orefs, context_hints)
return jsonResponse({"acknowledged": True}, status=202)
return jsonResponse({"error": "This API only accepts POST requests."})


@staff_member_required
def add_new_topic_api(request):
if request.method == "POST":
Expand Down Expand Up @@ -3205,13 +3220,37 @@ def reorder_topics(request):
results.append(topic.contents())
return jsonResponse({"topics": results})

@staff_member_required()
def topic_ref_bulk_api(request):
"""
API to bulk edit RefTopicLinks
"""
topic_links = json.loads(request.body)
all_links_touched = []
for link in topic_links:
tref = link.get('ref')
tref = Ref(tref).normal()
slug = link.get("toTopic")
linkType = _CAT_REF_LINK_TYPE_FILTER_MAP['authors'][0] if AuthorTopic.init(slug) else 'about'
descriptions = link.get("descriptions", link.get("description"))
languages = descriptions.keys()
for language in languages:
ref_topic_dict = edit_topic_source(slug, orig_tref=tref, new_tref=tref,
linkType=linkType, description=descriptions[language], interface_lang=language)
all_links_touched.append(ref_topic_dict)
return jsonResponse(all_links_touched)



@catch_error_as_json
def topic_ref_api(request, tref):
"""
API to get RefTopicLinks, as well as creating, editing, and deleting of RefTopicLinks
"""

data = request.GET if request.method in ["DELETE", "GET"] else json.loads(request.POST.get('json'))
try:
data = request.GET if request.method in ["DELETE", "GET"] else json.loads(request.POST.get('json'))
except Exception as e:
data = json.loads(request.body)
slug = data.get('topic')
interface_lang = 'en' if data.get('interface_lang') == 'english' else 'he'
tref = Ref(tref).normal() # normalize input
Expand Down
3 changes: 3 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ djangorestframework_simplejwt==3.3.0
PyJWT==1.7.1 # pinned b/c current version 2.0.0 breaks simplejwt. waiting for 2.0.1
elasticsearch==8.8.2
git+https://github.com/Sefaria/elasticsearch-dsl-py@v8.0.0#egg=elasticsearch-dsl
git+https://github.com/Sefaria/LLM@v1.0.3#egg=sefaria_llm_interface&subdirectory=app/llm_interface
geojson==2.5.0
geopy==2.3.0
gevent==20.12.0; sys_platform != 'darwin'
Expand Down Expand Up @@ -65,7 +66,9 @@ babel
python-bidi
requests
Cerberus
celery[redis]
google-re2
dnspython~=2.5.0

#opentelemetry-distro
#opentelemetry-exporter-otlp
Expand Down
6 changes: 6 additions & 0 deletions sefaria/celery_setup/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from celery import Celery
from sefaria.celery_setup.config import generate_config_from_env

app = Celery('sefaria')
app.conf.update(**generate_config_from_env())
app.autodiscover_tasks(packages=['sefaria.helper.llm'])
Loading

0 comments on commit f099595

Please sign in to comment.