IBM · elronbandel · Mar 4, 2024 · Jan 15, 2024 · Jan 18, 2024 · Jan 18, 2024
diff --git a/Makefile b/Makefile
@@ -63,3 +63,21 @@ metric:
 build:
 	format
 	pypi
+
+# command: make tag_name=${TAG_NAME} metric-service-build
+# example: make tag_name=unitxt-service-metric:b1v0.1 metric-service-build
+# Use the unitxt dir as the build context for docker, so the entire codebase
+# can be copied into the image. This way the latest code changes are intergrated into
+# the image, without requiring a formal unitxt release.
+metric-service-build:
+	cd $(DIR) && docker build --tag $(tag_name) --file $(DIR)/src/unitxt/service/metrics/Dockerfile .
+
+# command: make tag_name=${TAG_NAME} metric-service-run-bash
+# example: make tag_name=unitxt-service-metric:b1v0.1 metric-service-run-bash
+metric-service-run-bash:
+	docker run -it $(tag_name) /bin/bash
+
+# command: make tag_name=${TAG_NAME} metric-service-run
+# example: make tag_name=unitxt-service-metric:b1v0.1 metric-service-run
+metric-service-run:
+	docker run -p 8000:8000 --memory=20g $(tag_name)
diff --git a/pyproject.toml b/pyproject.toml
@@ -91,4 +91,7 @@ line-ending = "auto"
 
 
 [tool.ruff.lint.pydocstyle]
-convention = "google"
+convention = "google"
+
+[tool.ruff.flake8-bugbear]
+extend-immutable-calls = ["fastapi.Depends", "fastapi.params.Depends", "fastapi.Query", "fastapi.params.Query"]
diff --git a/requirements/service.rqr b/requirements/service.rqr
@@ -0,0 +1,5 @@
+torch==1.12.1
+fastapi==0.109.0
+uvicorn[standard]==0.27.0.post1
+python-jose[cryptography]==3.3.0
+transformers
diff --git a/requirements/tests.rqr b/requirements/tests.rqr
@@ -3,9 +3,10 @@ transformers
 sentence_transformers
 ibm-cos-sdk
 opendatasets
+httpretty~=1.1.4
 editdistance
 rouge-score
 nltk
 sacrebleu
 scikit-learn
-jiwer
+jiwer
diff --git a/setup.py b/setup.py
@@ -57,6 +57,7 @@
     entry_points={
         "console_scripts": [
             "unitxt-explore=unitxt.ui:launch",
+            "unitxt-metrics-service=unitxt.service.metrics.main:start_metrics_http_service",
         ],
     },
 )
diff --git a/src/unitxt/eval_utils.py b/src/unitxt/eval_utils.py
@@ -3,6 +3,8 @@
 
 import pandas as pd
 
+from .artifact import verbosed_fetch_artifact
+from .metric_utils import get_remote_metrics_endpoint, get_remote_metrics_names
 from .operator import SequentialOperator
 from .stream import MultiStream
 
@@ -22,9 +24,16 @@
     compute_conf_intervals: Optional[bool] = False,
 ):
     global_scores = {}
+    remote_metrics = get_remote_metrics_names()
     for metric_name in metric_names:
         multi_stream = MultiStream.from_iterables({"test": dataset}, copying=True)
-        metrics_operator = SequentialOperator(steps=[metric_name])
+        if metric_name in remote_metrics:
+            metric = verbosed_fetch_artifact(metric_name)
+            metric_step = as_remote_metric(metric)
+        else:
+            # The SequentialOperator below will handle the load of the metric fromm its name
+            metric_step = metric_name
+        metrics_operator = SequentialOperator(steps=[metric_step])
 
         if not compute_conf_intervals:
             first_step = metrics_operator.steps[0]
@@ -59,3 +68,24 @@
         compute_conf_intervals=compute_conf_intervals,
     )
     return pd.DataFrame(results), pd.DataFrame(global_scores)
+
+
+def as_remote_metric(metric):
+    """Wrap a metric with a RemoteMetric.
+
+    Currently supported is wrapping the inner metric within a MetricPipeline.
+    """
+    from .metrics import MetricPipeline, RemoteMetric
+
+    remote_metrics_endpoint = get_remote_metrics_endpoint()
+    if isinstance(metric, MetricPipeline):
+        metric = RemoteMetric.wrap_inner_metric_pipeline_metric(
+            metric_pipeline=metric,
+            remote_metrics_endpoint=remote_metrics_endpoint,
+        )
+    else:
+        raise ValueError(
+            f"Unexpected remote metric type {type(metric)} for the metric named '{metric.artifact_identifier}'. "
+            f"Remotely executed metrics should be MetricPipeline objects."
+        )
+    return metric
diff --git a/src/unitxt/metric_utils.py b/src/unitxt/metric_utils.py
@@ -1,7 +1,9 @@
-from typing import Iterable, List
+import json
+from typing import Any, Dict, Iterable, List, Optional
 
 from datasets import Features, Value
 
+from .dataclass import Dataclass
 from .operator import (
     MultiStreamOperator,
     SequentialOperatorInitilizer,
@@ -17,6 +19,7 @@
 )
 from .register import _reset_env_local_catalogs, register_all_artifacts
 from .schema import UNITXT_DATASET_SCHEMA
+from .settings_utils import get_settings
 from .stream import MultiStream, Stream
 
 
@@ -140,3 +143,92 @@
 
     stream = multi_stream[split_name]
     return list(stream)
+
+
+"""
+The API of a metric service:
+- MetricRequest: A single input request to the metrics service.
+- MetricResponse: A response returned from a metrics service.
+"""
+
+
+class InstanceInput(Dataclass):
+    """A single instance inputted to a metric service."""
+
+    prediction: Any
+    references: List[Any]
+    additional_inputs: Optional[Dict] = None
+
+
+class MetricRequest(Dataclass):
+    """A request to a metrics service, includes a list of input instances."""
+
+    instance_inputs: List[InstanceInput]
+
+
+class MetricResponse(Dataclass):
+    """A response produced by a metrics service, includes the computed scores."""
+
+    # A list of instance score dictionaries. Each dictionary contains the
+    # score names and score values for a single instance.
+    instances_scores: List[Dict[str, Any]]
+    # The global scores dictionary, containing global score names and values.
+    # These are scores computed over the entire set of input instances, e.g.
+    # an average over a score computed per instance.
+    global_score: Dict[str, Any]
+
+
+"""
+Functionality for loading the remote metrics configuration from local environment variables.
+"""
+
+# A list of metrics to be executed remotely.
+# For example: '["metrics.rag.context_relevance","metrics.rag.bert_k_precision"]'
+# This value should be a valid json list
+UNITXT_REMOTE_METRICS = "UNITXT_REMOTE_METRICS"
+
+# The remote endpoint on which the remote metrics are available.
+# For example, 'http://127.0.0.1:8000/compute'
+UNITXT_REMOTE_METRICS_ENDPOINT = "UNITXT_REMOTE_METRICS_ENDPOINT"
+
+
+def get_remote_metrics_names() -> List[str]:
+    """Load the remote metrics names from an environment variable.
+
+    Returns:
+        List[str] - names of metrics to be executed remotely.
+    """
+    settings = get_settings()
+    remote_metrics = settings.remote_metrics
+    if remote_metrics:
+        remote_metrics = json.loads(remote_metrics)
+    if not isinstance(remote_metrics, list):
+        raise RuntimeError(
+            f"Unexpected value {remote_metrics} for the '{UNITXT_REMOTE_METRICS}' environment variable. "
+            f"The value is expected to be a list of metric names in json format."
+        )
+    for remote_metric in remote_metrics:
+        if not isinstance(remote_metric, str):
+            raise RuntimeError(
+                f"Unexpected value {remote_metric} within the '{UNITXT_REMOTE_METRICS}' environment variable. "
+                f"The value is expected to be a string but its type is {type(remote_metric)}."
+            )
+    return remote_metrics
+
+
+def get_remote_metrics_endpoint() -> str:
+    """Load the remote metrics endpoint from an environment variable.
+
+    Returns:
+        str - The remote endpoint on which the remote metrics are available.
+    """
+    settings = get_settings()
+    try:
+        remote_metrics_endpoint = settings.remote_metrics_endpoint
+    except AttributeError as e:
+        raise RuntimeError(
+            f"Unexpected None value for '{UNITXT_REMOTE_METRICS_ENDPOINT}'. "
+            f"Running remote metrics requires defining an "
+            f"endpoint in the environment variable '{UNITXT_REMOTE_METRICS_ENDPOINT}'."
+        ) from e
+    return remote_metrics_endpoint