Skip to content

Commit

Permalink
fix doc
Browse files Browse the repository at this point in the history
  • Loading branch information
andreiionutdamian committed Mar 20, 2024
1 parent 236c5ec commit 3aff885
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 9 deletions.
7 changes: 5 additions & 2 deletions TODO.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,13 @@ model = AutoModelForSequenceClassification.from_pretrained(model_name, cache_dir
### Model loading in serving_app.py

Change requests:
- move load model in separate mixin - separation of concerns
- use ultra-low footprint for monitor & use base_th_llm_fastapi for serving
- add "lambda" model for json model load (identity function: f(x)=x)
- create non-gpu serving for stg deployment
- add CPU/GPU to serving result
- add label conversion for monitor (if given)
- add predict/text with POST request and multiple inputs -> result has multiple outputs
- add predict time to output (in ms)
- refactor: short lines & named params for multi-param calls

1. get `model_name`, `model_type` and `model_cache` from redis
2. check if the model_cache folder contains a subfolder that contains model_name (maybe fail)
Expand Down
5 changes: 2 additions & 3 deletions app/Dockerfile_monitor
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
FROM aidamian/base_fastapi:latest
WORKDIR /test_app

RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
RUN pip install transformers scipy
RUN pip install psycopg2-binary redis
RUN pip3 install torch --index-url https://download.pytorch.org/whl/cpu
RUN pip install --no-cache transformers scipy psycopg2-binary redis

# basic stuff
ADD src/mixins ./mixins/
Expand Down
4 changes: 2 additions & 2 deletions app/src/mixins/llm_mixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,14 @@ def __init__(self):
self.cache_root = os.getenv('CACHE_ROOT', '.cache')
return

def load_model(self, model_type: str, model_name: str, retunpipe=False):
def load_model(self, model_type: str, model_name: str, returnpipe=False):
model_cache=f"{self.cache_root}/{model_name}"
result = None
try:
if model_type == "text":
tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=model_cache)
model = AutoModelForSequenceClassification.from_pretrained(model_name, cache_dir=model_cache, trust_remote_code=True)
result = pipeline("text-classification", model=model, tokenizer=tokenizer) if retunpipe else model
result = pipeline("text-classification", model=model, tokenizer=tokenizer) if returnpipe else model
elif model_type == "image":
image_processor = AutoImageProcessor.from_pretrained(model_name, cache_dir=model_cache)
result = image_processor
Expand Down
9 changes: 7 additions & 2 deletions app/src/monitor/monitor_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,10 @@ def get_latest_model(self, model_type: str):

def get_latest_model_top(self, model_type: str):
latest = None
models = self.postgres_select_data_ordered("models","model_date", "desc", 1, model_type=model_type)
models = self.postgres_select_data_ordered(
table_name="models",order_by="model_date", order="desc",
maxcount=1, model_type=model_type
)
if models:
latest = models[0]
#endif
Expand All @@ -127,7 +130,9 @@ def maybe_init_models(self):
#iterate model types
for model_type in model_types:
latest = self.get_latest_model_top(model_type[0])
if self.load_model(model_type[0], latest[3], False):
model = self.load_model(model_type[0], latest[3], False)
model_exists = model is not None
if model_exists:
self.redis_sethash("models",model_type[0], latest[3])
self.P(f"Cache update: {model_type[0]} - {latest[3]}")
#endif
Expand Down

0 comments on commit 3aff885

Please sign in to comment.