Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: move clip models #945

Open
wants to merge 16 commits into
base: main
Choose a base branch
from
2 changes: 1 addition & 1 deletion client/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
setup_requires=['setuptools>=18.0', 'wheel'],
install_requires=[
'jina>=3.12.0',
'docarray[common]>=0.19.0,<0.30.0',
'docarray[common]==0.21.0',
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This requirement is too strong. Could we have some flexible version bump here?

'packaging',
],
extras_require={
Expand Down
27 changes: 21 additions & 6 deletions server/clip_server/model/clip_onnx.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
from typing import Dict, Optional
import requests

from clip_server.model.pretrained_models import (
download_model,
Expand All @@ -8,10 +9,7 @@
)
from clip_server.model.clip_model import BaseCLIPModel

_S3_BUCKET = (
'https://clip-as-service.s3.us-east-2.amazonaws.com/models/onnx/' # Deprecated
)
_S3_BUCKET_V2 = 'https://clip-as-service.s3.us-east-2.amazonaws.com/models-436c69702d61732d53657276696365/onnx/'
_HUGGINGFACE_ONNX_BUCKET = 'https://huggingface.co/jinaai/clip-models/'
_MODELS = {
'RN50::openai': (
('RN50/textual.onnx', '722418bfe47a1f5c79d1f44884bb3103'),
Expand Down Expand Up @@ -213,14 +211,14 @@
)
textual_model_name, textual_model_md5 = _MODELS[name][0]
self._textual_path = download_model(
url=_S3_BUCKET_V2 + textual_model_name,
url=self.get_onnx_model_url(name=textual_model_name),
target_folder=cache_dir,
md5sum=textual_model_md5,
with_resume=True,
)
visual_model_name, visual_model_md5 = _MODELS[name][1]
self._visual_path = download_model(
url=_S3_BUCKET_V2 + visual_model_name,
url=self.get_onnx_model_url(name=visual_model_name),
target_folder=cache_dir,
md5sum=visual_model_md5,
with_resume=True,
Expand Down Expand Up @@ -261,6 +259,23 @@

return name

@staticmethod
def get_onnx_model_url(name: str):
hf_download_url = (
_HUGGINGFACE_ONNX_BUCKET
+ 'resolve/main/'
+ name.split('/')[0]
+ '-'
+ name.split('/')[1]
+ '?download=true'
)
try:
response = requests.head(hf_download_url, timeout=10)
if response.status_code in [200, 302]:
return hf_download_url
except Exception:
raise ValueError('Invalid model url.')

Check warning on line 277 in server/clip_server/model/clip_onnx.py

View check run for this annotation

Codecov / codecov/patch

server/clip_server/model/clip_onnx.py#L276-L277

Added lines #L276 - L277 were not covered by tests

def start_sessions(
self,
dtype,
Expand Down
35 changes: 25 additions & 10 deletions server/clip_server/model/pretrained_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
import hashlib
import shutil
import urllib
import requests


_OPENCLIP_S3_BUCKET = 'https://clip-as-service.s3.us-east-2.amazonaws.com/models/torch'
_OPENCLIP_HUGGINGFACE_BUCKET = 'https://huggingface.co/jinaai/clip-models/'
_OPENCLIP_MODELS = {
'RN50::openai': ('RN50.pt', '9140964eaaf9f68c95aa8df6ca13777c'),
'RN50::yfcc15m': ('RN50-yfcc15m.pt', 'e9c564f91ae7dc754d9043fdcd2a9f22'),
Expand Down Expand Up @@ -132,7 +133,7 @@
def md5file(filename: str):
hash_md5 = hashlib.md5()
with open(filename, 'rb') as f:
for chunk in iter(lambda: f.read(4096), b""):
for chunk in iter(lambda: f.read(4096), b''):
hash_md5.update(chunk)

return hash_md5.hexdigest()
Expand All @@ -143,18 +144,32 @@
if len(model_pretrained) == 0: # not on s3
return None, None
else:
return (_OPENCLIP_S3_BUCKET + '/' + model_pretrained[0], model_pretrained[1])
hf_download_url = (
_OPENCLIP_HUGGINGFACE_BUCKET
+ 'resolve/main/'
+ model_pretrained[0]
+ '?download=true'
)
try:
response = requests.head(hf_download_url, timeout=10)
if response.status_code in [200, 302]:
return (hf_download_url, model_pretrained[1])
except Exception:
raise ValueError('Invalid model url.')

Check warning on line 158 in server/clip_server/model/pretrained_models.py

View check run for this annotation

Codecov / codecov/patch

server/clip_server/model/pretrained_models.py#L157-L158

Added lines #L157 - L158 were not covered by tests


def download_model(
url: str,
target_folder: str = os.path.expanduser("~/.cache/clip"),
target_folder: str = os.path.expanduser('~/.cache/clip'),
md5sum: str = None,
with_resume: bool = True,
max_attempts: int = 3,
) -> str:
os.makedirs(target_folder, exist_ok=True)
filename = os.path.basename(url)
filename = filename.split('?')[0]
if filename.split('.')[-1] == 'onnx':
filename = filename.split('-')[-1]

download_target = os.path.join(target_folder, filename)

Expand All @@ -175,14 +190,14 @@
)

progress = Progress(
" \n", # divide this bar from Flow's bar
TextColumn("[bold blue]{task.fields[filename]}", justify="right"),
"[progress.percentage]{task.percentage:>3.1f}%",
"•",
' \n', # divide this bar from Flow's bar
TextColumn('[bold blue]{task.fields[filename]}', justify='right'),
'[progress.percentage]{task.percentage:>3.1f}%',
'•',
DownloadColumn(),
"•",
'•',
TransferSpeedColumn(),
"•",
'•',
TimeRemainingColumn(),
)

Expand Down
2 changes: 1 addition & 1 deletion server/clip_server/torch-flow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,6 @@ executors:
jtype: CLIPEncoder
metas:
py_modules:
- clip_server.executors.clip_torch
- clip_server.executors.clip_onnx
selmiss marked this conversation as resolved.
Show resolved Hide resolved
timeout_ready: 3000000
replicas: 1
Loading