IBM · elronbandel · Mar 4, 2024 · Jan 15, 2024 · Jan 18, 2024 · Jan 18, 2024
diff --git a/Makefile b/Makefile
@@ -58,3 +58,21 @@ metric:
 build:
 	format
 	pypi
+
+# command: make tag_name=${TAG_NAME} metric-service-build
+# example: make tag_name=unitxt-service-metric:b1v0.1 metric-service-build
+# Use the unitxt dir as the build context for docker, so the entire codebase
+# can be copied into the image. This way the latest code changes are intergrated into
+# the image, without requiring a formal unitxt release.
+metric-service-build:
+	cd $(DIR) && docker buildx build --tag $(tag_name) --file $(DIR)/service/metrics/Dockerfile .
+
+# command: make tag_name=${TAG_NAME} metric-service-run-bash
+# example: make tag_name=unitxt-service-metric:b1v0.1 metric-service-run-bash
+metric-service-run-bash:
+	docker run -it $(tag_name) /bin/bash
+
+# command: make tag_name=${TAG_NAME} metric-service-run
+# example: make tag_name=unitxt-service-metric:b1v0.1 metric-service-run
+metric-service-run:
+	docker run -p 8000:8000 --memory=20g $(tag_name)
diff --git a/pyproject.toml b/pyproject.toml
@@ -91,4 +91,7 @@ line-ending = "auto"
 
 
 [tool.ruff.lint.pydocstyle]
-convention = "google"
+convention = "google"
+
+[tool.ruff.flake8-bugbear]
+extend-immutable-calls = ["fastapi.Depends", "fastapi.params.Depends", "fastapi.Query", "fastapi.params.Query"]
diff --git a/requirements/base.rqr b/requirements/base.rqr
@@ -4,3 +4,4 @@ mecab-python3
 absl-py
 dpath
 ipadic
+pydantic==2.6.0
diff --git a/requirements/tests.rqr b/requirements/tests.rqr
@@ -3,9 +3,10 @@ transformers
 sentence_transformers
 ibm-cos-sdk
 opendatasets
+httpretty~=1.1.4
 editdistance
 rouge-score
 nltk
 sacrebleu
 scikit-learn
-jiwer
+jiwer
diff --git a/service/metrics/Dockerfile b/service/metrics/Dockerfile
@@ -0,0 +1,87 @@
+# This dockerfile exemplifies how a unitxt metrics service may be containerized
+
+FROM registry.access.redhat.com/ubi8/ubi:latest
+
+# Disable Red Hat Subscripstion
+RUN sed -i 's/1/0/g' /etc/yum/pluginconf.d/subscription-manager.conf
+
+# Set up conda env var
+ENV CONDA_HOME=/opt/conda \
+    PATH=/opt/conda/bin:/usr/local/nvidia/bin:$PATH \
+    CONFIG_DIR=/root/config_dir \
+    BUILD_DIR=/tmp/unitxt_metric_service \
+    USE_TF=0 \
+    USE_TORCH=1
+
+USER root
+
+RUN yum -y update --allowerasing --nobest && yum -y upgrade --allowerasing --nobest && \
+    yum -y install --allowerasing --nobest \
+        wget \
+        bzip2 \
+        git \
+        unzip && \
+    yum clean all
+
+# Install Anaconda
+RUN wget https://repo.anaconda.com/miniconda/Miniconda3-py37_4.8.2-Linux-x86_64.sh && \
+    chmod 755 ./Miniconda3-py37_4.8.2-Linux-x86_64.sh && \
+    /bin/bash ./Miniconda3-py37_4.8.2-Linux-x86_64.sh -b -p ${CONDA_HOME} && \
+    rm -rf ./Miniconda3-py37_4.8.2-Linux-x86_64.sh && \
+    echo >>"${CONDA_HOME}/conda-meta/pinned" "conda=4.8" && \
+    conda config --system --set always_yes True && \
+    conda config --system --set auto_update_conda False && \
+    conda config --system --set notify_outdated_conda False && \
+    conda install python~=3.8.0 && \
+    conda clean -a
+
+ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib64/:/opt/conda/lib/
+# Fix versions for deployment on a specific GPU type
+# Change the version numbers to match your hardware
+RUN conda install pytorch==1.12.1 torchvision==0.13.1 torchaudio==0.12.1 cudatoolkit=11.3 -c pytorch
+
+# Non-root user
+ENV NONROOT_USER=gpuuser \
+    NONROOT_UID=1000 \
+    NONROOT_GID=1000 \
+    NONROOT_HOME=/home/gpuuser \
+    PATH="/home/gpuuser/.local/bin:${PATH}"
+
+RUN groupadd -g ${NONROOT_GID} ${NONROOT_USER} && \
+    useradd -u ${NONROOT_UID} -g ${NONROOT_GID} -G users -m -c "" -e "" -l -s /bin/bash ${NONROOT_USER} && \
+    mkdir -p /var/run/sshd && mkdir -p ${NONROOT_HOME}/.ssh && \
+    chown ${NONROOT_USER} ${NONROOT_HOME}/.ssh && \
+    chmod -R 700 ${NONROOT_HOME}
+
+# Update pip
+RUN pip install --upgrade pip
+
+RUN mkdir -p /usr/local/bin/
+
+USER ${NONROOT_USER}
+
+# Copy unitxt into the image
+COPY --chown=${NONROOT_USER}:${NONROOT_GID} . /app/unitxt/.
+
+# Install the unitxt metrics service requirements
+RUN cat /app/unitxt/service/metrics/requirements.txt
+RUN pip3 install -r /app/unitxt/service/metrics/requirements.txt
+RUN pip3 install cffi --upgrade
+
+# pip install unitxt
+WORKDIR /app/unitxt
+RUN pip install -e ".[all]"
+
+WORKDIR /app/unitxt/service/metrics
+ENV HF_HOME=/app/hf/misc
+ENV HF_DATASETS_CACHE=/app/hf/datasets
+ENV TRANSFORMERS_CACHE=/app/hf/models
+EXPOSE 8000
+
+ENV PYTHONPATH=/app/unitxt
+ENV PYTHONHASHSEED 0
+
+RUN env
+RUN pip3 list
+RUN conda list
+CMD python3 main.py
diff --git a/service/metrics/main.py b/service/metrics/main.py
@@ -0,0 +1,158 @@
+import datetime
+import logging
+import threading
+import time
+from logging import Formatter, StreamHandler, getLevelName, getLogger
+from typing import cast
+
+import torch
+import uvicorn
+from fastapi import Depends, FastAPI, Request
+from fastapi.exceptions import HTTPException
+from starlette.responses import JSONResponse
+from tokens import verify_token
+
+from src.unitxt.metric_utils import MetricRequest, MetricResponse
+
+"""
+This module defines an http server that wraps unitxt metrics.
+It accepts requests detailing which metric to run, and what is the data to run on.
+The requests are handled by running them one by one locally, potentially on a GPU.
+"""
+
+# init the FastAPI app object
+app = FastAPI(version="0.0.1", title="Unitxt Metrics Service")
+
+
+def init_logger():
+    log = getLogger()
+    log.setLevel(getLevelName("INFO"))
+    log_formatter = Formatter(
+        "%(asctime)s [%(levelname)s] %(filename)s %(lineno)d: %(message)s [%(threadName)s]"
+    )
+
+    console_handler = StreamHandler()
+    console_handler.setFormatter(log_formatter)
+    log.handlers = []
+    log.addHandler(console_handler)
+
+
+init_logger()
+
+
+# for sanity check
+@app.get("/", include_in_schema=False)
+def read_root():
+    return {"Hello": "Unitxt Metrics"}
+
+
+# for k8s health checks
+@app.get("/health", include_in_schema=False)
+def health():
+    return "OK"
+
+
+# A lock to make sure single use of the GPU
+compute_lock = threading.Lock()
+
+
+# for computing a metric
+@app.post("/compute/{metric}", response_model=MetricResponse)
+def compute(metric: str, request: MetricRequest, token: dict = Depends(verify_token)):
+    # imports are here, so the service could start even if unitxt is not installed.
+    # This is useful for testing, it enabled running health checks and sanity checks, without unitxt.
+    from unitxt.artifact import Artifact
+    from unitxt.operator import MultiStreamOperator
+    from unitxt.operators import ArtifactFetcherMixin
+    from unitxt.stream import MultiStream
+
+    t0 = time.perf_counter()
+    try:
+        logging.info(f"Request from [{token['sub']}]")
+        logging.info(f"Computing metric '{metric}'.")
+        logging.info(
+            f"MetricRequest contains {len(request.instance_inputs)} input instances"
+        )
+
+        start_time = datetime.datetime.now()
+        # Only allow single use of the GPU, other requests wait on this lock, till
+        # current computation is done.
+        with compute_lock:
+            logging.info("Acquired compute_lock, starting computation .. ")
+            start_infer_time = datetime.datetime.now()
+            # obtain the metric to compute
+            metric_artifact: Artifact = ArtifactFetcherMixin.get_artifact(metric)
+            metric_artifact: MultiStreamOperator = cast(
+                MultiStreamOperator, metric_artifact
+            )
+
+            # prepare the input stream
+            multi_stream: MultiStream = MultiStream.from_iterables(
+                {"test": request.model_dump()["instance_inputs"]}, copying=True
+            )
+
+            # apply the metric and obtain the results
+            metric_results = list(metric_artifact(multi_stream)["test"])
+
+        infer_time = datetime.datetime.now() - start_infer_time
+        wait_time = start_infer_time - start_time
+        logging.info(
+            f"Computed {len(metric_results)} metric '{metric}' results, "
+            f"took: {infer_time!s}, waited: {wait_time!s}')"
+        )
+
+        metric_response = {
+            "instances_scores": [
+                metric_result["score"]["instance"] for metric_result in metric_results
+            ],
+            "global_score": metric_results[0]["score"]["global"],
+        }
+        return MetricResponse.model_validate(metric_response)
+    finally:
+        t1 = time.perf_counter()
+        logging.info(f"Request for metric '{metric}' handled in [{t1 - t0:.2f}] secs.")
+
+
+# wrapper for HTTP exceptions that we throw
+@app.exception_handler(HTTPException)
+async def unicorn_http_exception_handler(_request: Request, exc: HTTPException):
+    logging.exception("HTTP Exception raised")
+    return JSONResponse(
+        status_code=exc.status_code,
+        headers=exc.headers,
+        content={"message": exc.detail},
+    )
+
+
+# wrapper for unexpected exceptions
+@app.exception_handler(Exception)
+async def unicorn_exception_handler(_request: Request, exc: Exception):
+    logging.exception(f"Unexpected exception raised: {type(exc).__name__}")
+    return JSONResponse(
+        status_code=500,
+        content={"message": "Internal Server Error"},
+    )
+
+
+def print_gpus_status():
+    if torch.cuda.is_available():
+        logging.info("Using CUDA")
+        logging.info(f"CUDNN VERSION: {torch.backends.cudnn.version()}")
+        gpu_id = torch.cuda.current_device()
+        logging.info(
+            f"There are {torch.cuda.device_count()} GPUs available, using GPU {gpu_id}, name: {torch.cuda.get_device_name(gpu_id)}"
+        )
+        logging.info(
+            f"CUDA Device Total Memory [GB]: {torch.cuda.get_device_properties(0).total_memory / 1e9}"
+        )
+    else:
+        logging.info("There are NO GPUs available.")
+
+
+def start_metrics_http_service():
+    print_gpus_status()
+    uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=False, log_config=None)
+
+
+if __name__ == "__main__":
+    start_metrics_http_service()
diff --git a/service/metrics/requirements.txt b/service/metrics/requirements.txt
@@ -0,0 +1,3 @@
+fastapi==0.109.0
+uvicorn[standard]==0.27.0.post1
+python-jose[cryptography]==3.3.0
diff --git a/service/metrics/tokens.py b/service/metrics/tokens.py
@@ -0,0 +1,83 @@
+import logging
+import os
+from datetime import datetime, timedelta
+
+from fastapi import Depends, HTTPException
+from fastapi.security import OAuth2PasswordBearer
+from jose import JWTError, jwt
+from starlette import status
+
+# This module handles authorization tokens for a service.
+# To generate a master token key, run "openssl rand -hex 32".
+# Then, save the value in the environment variable UNITXT_METRICS_MASTER_KEY.
+# To create tokens that have access for the master key, use create_token(..), as shown in main().
+
+if "UNITXT_METRICS_MASTER_KEY" in os.environ:
+    MASTER_KEY = os.environ["UNITXT_METRICS_MASTER_KEY"]
+else:
+    MASTER_KEY = None
+
+ALGORITHM = "HS256"
+ACCESS_TOKEN_EXPIRE_DAYS = 360
+
+log = logging.getLogger("tokens")
+
+
+class InvalidTokenError(Exception):
+    pass
+
+
+def create_token(name: str):
+    assert MASTER_KEY is not None
+
+    # create the token data
+    now = datetime.utcnow()
+    expires_delta = timedelta(days=ACCESS_TOKEN_EXPIRE_DAYS)
+    payload = {
+        "iss": "Unitxt Metrics",
+        "sub": name,
+        "iat": now,
+        "exp": now + expires_delta,
+    }
+
+    # generate the jwt token and return it
+    return jwt.encode(payload, MASTER_KEY, algorithm=ALGORITHM)
+
+
+def verify_jwt_token(jwt_token):
+    try:
+        if MASTER_KEY:
+            payload = jwt.decode(jwt_token, MASTER_KEY, algorithms=[ALGORITHM])
+            if payload["sub"] is None:
+                raise InvalidTokenError("Token subject claim is empty")
+            return payload
+        return {"sub": "Anonymous"}
+    except JWTError as e:
+        raise InvalidTokenError from e
+
+
+# This object makes sure that the incoming HTTP request has a header with
+# an authorization token (e.g. passed with 'curl -H "Authorization: Bearer {token}"').
+# It does NOT check that the token has a valid value (that is done by verify_token(..)).
+oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")
+
+
+async def verify_token(token: str = Depends(oauth2_scheme)):
+    try:
+        return verify_jwt_token(token)
+    except InvalidTokenError as e:
+        log.exception(e)
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail="Could not validate credentials",
+            headers={"WWW-Authenticate": "Bearer"},
+        ) from e
+
+
+def main():
+    name = "unitxt-metrics-service-tester"
+    log.info(f"{name}: {create_token(name)}")
+
+
+if __name__ == "__main__":
+    main()