fix doc

andreiionutdamian · Mar 20, 2024 · 3aff885 · 3aff885
1 parent 236c5ec
commit 3aff885
Show file tree

Hide file tree

Showing 4 changed files with 16 additions and 9 deletions.
diff --git a/TODO.md b/TODO.md
@@ -19,10 +19,13 @@ model = AutoModelForSequenceClassification.from_pretrained(model_name, cache_dir
 ### Model loading in serving_app.py
 
 Change requests:
-  - move load model in separate mixin - separation of concerns
-  - use ultra-low footprint for monitor & use base_th_llm_fastapi for serving
   - add "lambda" model for json model load (identity function: f(x)=x)
   - create non-gpu serving for stg deployment
+  - add CPU/GPU to serving result
+  - add label conversion for monitor (if given)
+  - add predict/text with POST request and multiple inputs -> result has multiple outputs
+  - add predict time to output (in ms)
+  - refactor: short lines & named params for multi-param calls
 
 1. get `model_name`, `model_type` and `model_cache` from redis
 2. check if the model_cache folder contains a subfolder that contains model_name (maybe fail)

diff --git a/app/Dockerfile_monitor b/app/Dockerfile_monitor
@@ -1,9 +1,8 @@
 FROM aidamian/base_fastapi:latest
 WORKDIR /test_app
 
-RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
-RUN pip install transformers scipy
-RUN pip install psycopg2-binary redis
+RUN pip3 install torch --index-url https://download.pytorch.org/whl/cpu
+RUN pip install --no-cache transformers scipy psycopg2-binary redis
 
 # basic stuff
 ADD src/mixins ./mixins/

diff --git a/app/src/mixins/llm_mixin.py b/app/src/mixins/llm_mixin.py
@@ -11,14 +11,14 @@ def __init__(self):
     self.cache_root = os.getenv('CACHE_ROOT', '.cache')
     return
 
-  def load_model(self, model_type: str, model_name: str, retunpipe=False):
+  def load_model(self, model_type: str, model_name: str, returnpipe=False):
     model_cache=f"{self.cache_root}/{model_name}"
     result = None
     try:
       if model_type == "text":
         tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=model_cache)
         model = AutoModelForSequenceClassification.from_pretrained(model_name, cache_dir=model_cache, trust_remote_code=True)
-        result = pipeline("text-classification", model=model, tokenizer=tokenizer) if retunpipe else model
+        result = pipeline("text-classification", model=model, tokenizer=tokenizer) if returnpipe else model
       elif model_type == "image":
         image_processor = AutoImageProcessor.from_pretrained(model_name, cache_dir=model_cache)
         result = image_processor

diff --git a/app/src/monitor/monitor_app.py b/app/src/monitor/monitor_app.py
@@ -104,7 +104,10 @@ def get_latest_model(self, model_type: str):
 
   def get_latest_model_top(self, model_type: str):
     latest = None
-    models = self.postgres_select_data_ordered("models","model_date", "desc", 1,  model_type=model_type)
+    models = self.postgres_select_data_ordered(
+      table_name="models",order_by="model_date", order="desc", 
+      maxcount=1,  model_type=model_type
+    )
     if models:
       latest = models[0]
     #endif
@@ -127,7 +130,9 @@ def maybe_init_models(self):
         #iterate model types
         for model_type in model_types:
           latest = self.get_latest_model_top(model_type[0])
-          if self.load_model(model_type[0], latest[3], False):
+          model = self.load_model(model_type[0], latest[3], False)
+          model_exists = model is not None
+          if model_exists:
             self.redis_sethash("models",model_type[0], latest[3])
             self.P(f"Cache update: {model_type[0]} - {latest[3]}")
           #endif