neuralmagic · dsikka · Oct 11, 2023 · Sep 11, 2023 · Sep 11, 2023 · Sep 12, 2023
diff --git a/src/deepsparse/server/server.py b/src/deepsparse/server/server.py
@@ -20,6 +20,7 @@
 from typing import List
 
 import yaml
+from pydantic import BaseModel
 
 import uvicorn
 from deepsparse.engine import Context
@@ -43,12 +44,21 @@
     log_system_information,
 )
 from fastapi import FastAPI, UploadFile
+from fastapi.exceptions import HTTPException
 from starlette.responses import RedirectResponse
 
 
 _LOGGER = logging.getLogger(__name__)
 
 
+class CheckReady(BaseModel):
+    status: str = "OK"
+
+
+class ModelMetaData(BaseModel):
+    model_path: str
+
+
 def start_server(
     config_path: str,
     host: str = "0.0.0.0",
@@ -142,12 +152,14 @@ def _home():
     def _info():
         return server_config
 
-    @app.get("/ping", tags=["general"], response_model=bool)
-    @app.get("/health", tags=["general"], response_model=bool)
-    @app.get("/healthcheck", tags=["general"], response_model=bool)
-    @app.get("/status", tags=["general"], response_model=bool)
-    def _health():
-        return True
+    @app.get("/v2/health/ready", tags=["health"], response_model=CheckReady)
+    @app.get("/v2/health/live", tags=["health"], response_model=CheckReady)
+    def _check_health():
+        return CheckReady(status="OK")
+
+    @app.get("/v2", tags=["metadata", "server"], response_model=str)
+    def _get_server_info():
+        return "This is the deepsparse server. Hello!"
 
     @app.post("/endpoints", tags=["endpoints"], response_model=bool)
     def _add_endpoint_endpoint(cfg: EndpointConfig):
@@ -237,16 +249,76 @@ def _add_endpoint(
     pipeline = Pipeline.from_config(pipeline_config, context, server_logger)
 
     _LOGGER.info(f"Adding endpoints for '{endpoint_config.name}'")
-    _add_pipeline_endpoint(
+    _add_inference_endpoints(
         app,
         endpoint_config,
         server_config.system_logging,
         pipeline,
         server_config.integration,
     )
+    _add_status_and_metadata_endpoints(
+        app, endpoint_config, pipeline, server_config.integration
+    )
+
+
+def _add_endpoint_to_app(app, routes_and_fns, response_model, methods, tags):
+    for route, endpoint_fn in routes_and_fns:
+        app.add_api_route(
+            route,
+            endpoint_fn,
+            response_model=response_model,
+            methods=methods,
+            tags=tags,
+        )
+        _LOGGER.info(f"Added '{route}' endpoint")
+
+
+def clean_up_route(route):
+    if not route.startswith("/"):
+        route = "/" + route
+    return route
+
+
+def _add_status_and_metadata_endpoints(
+    app: FastAPI,
+    endpoint_config: EndpointConfig,
+    pipeline: Pipeline,
+    integration: str = INTEGRATION_LOCAL,
+):
+    def _pipeline_ready():
+        return CheckReady(status="OK")
+
+    def _model_metadata():
+        if not pipeline or not pipeline.model_path:
+            HTTPException(status_code=404, detail="Model path not found")
+        return ModelMetaData(model_path=pipeline.model_path)
+
+    routes_and_fns = []
+    meta_and_fns = []
+
+    if integration == INTEGRATION_LOCAL:
+        if endpoint_config.route:
+            endpoint_config.route = clean_up_route(endpoint_config.route)
+            route_ready = f"{endpoint_config.route}/ready"
+            route_meta = endpoint_config.route
+        else:
+            route_ready = f"/v2/models/{endpoint_config.name}/ready"
+            route_meta = f"/v2/models/{endpoint_config.name}"
+
+    elif integration == INTEGRATION_SAGEMAKER:
+        route_ready = "/invocations/ready"
+        route_meta = "/invocations"
+
+    routes_and_fns.append((route_ready, _pipeline_ready))
+    meta_and_fns.append((route_meta, _model_metadata))
+
+    _add_endpoint_to_app(
+        app, meta_and_fns, ModelMetaData, ["GET"], ["model", "metadata"]
+    )
+    _add_endpoint_to_app(app, routes_and_fns, CheckReady, ["GET"], ["model", "health"])
 
 
-def _add_pipeline_endpoint(
+def _add_inference_endpoints(
     app: FastAPI,
     endpoint_config: EndpointConfig,
     system_logging_config: SystemLoggingConfig,
@@ -275,26 +347,24 @@ def _predict_from_files(request: List[UploadFile]):
 
     routes_and_fns = []
     if integration == INTEGRATION_LOCAL:
-        route = endpoint_config.route or "/predict"
-        if not route.startswith("/"):
-            route = "/" + route
+        route = (
+            f"{endpoint_config.route}/infer"
+            if endpoint_config.route
+            else f"/v2/models/{endpoint_config.name}/infer"
+        )
+        route = clean_up_route(route)
 
         routes_and_fns.append((route, _predict))
         if hasattr(input_schema, "from_files"):
             routes_and_fns.append((route + "/from_files", _predict_from_files))
+
     elif integration == INTEGRATION_SAGEMAKER:
-        route = "/invocations"
+        route = "/invocations/infer"
         if hasattr(input_schema, "from_files"):
             routes_and_fns.append((route, _predict_from_files))
         else:
             routes_and_fns.append((route, _predict))
 
-    for route, endpoint_fn in routes_and_fns:
-        app.add_api_route(
-            route,
-            endpoint_fn,
-            response_model=output_schema,
-            methods=["POST"],
-            tags=["predict"],
-        )
-        _LOGGER.info(f"Added '{route}' endpoint")
+    _add_endpoint_to_app(
+        app, routes_and_fns, output_schema, ["POST"], ["model", "inference"]
+    )
diff --git a/tests/server/test_endpoints.py b/tests/server/test_endpoints.py
@@ -20,7 +20,7 @@
 import pytest
 from deepsparse.loggers import MultiLogger
 from deepsparse.server.config import EndpointConfig, ServerConfig, SystemLoggingConfig
-from deepsparse.server.server import _add_pipeline_endpoint, _build_app
+from deepsparse.server.server import _add_inference_endpoints, _build_app
 from fastapi import FastAPI, UploadFile
 from fastapi.testclient import TestClient
 from tests.utils import mock_engine
@@ -57,11 +57,11 @@ def test_config(self, server_config, client):
         loaded = ServerConfig(**response.json())
         assert loaded == server_config
 
-    @pytest.mark.parametrize("route", ["/ping", "/health", "/healthcheck", "/status"])
+    @pytest.mark.parametrize("route", ["/v2/health/ready", "/v2/health/live"])
     def test_pings_exist(self, client, route):
         response = client.get(route)
         assert response.status_code == 200
-        assert response.json() is True
+        assert response.json()["status"] == "OK"
 
     def test_docs_exist(self, client):
         assert client.get("/docs").status_code == 200
@@ -97,69 +97,75 @@ def test_add_model_endpoint(self, app: FastAPI, client: TestClient):
             output_schema=int,
             logger=MultiLogger([]),
         )
-        _add_pipeline_endpoint(
+        _add_inference_endpoints(
             app,
             system_logging_config=SystemLoggingConfig(),
             endpoint_config=Mock(route="/predict/parse_int"),
             pipeline=mock_pipeline,
         )
-        assert app.routes[-1].path == "/predict/parse_int"
+        assert app.routes[-1].path == "/predict/parse_int/infer"
         assert app.routes[-1].response_model is int
         assert app.routes[-1].endpoint.__annotations__ == {"request": StrSchema}
         assert app.routes[-1].methods == {"POST"}
 
         for v in ["1234", "5678"]:
-            response = client.post("/predict/parse_int", json=dict(value=v))
+            response = client.post("/predict/parse_int/infer", json=dict(value=v))
             assert response.status_code == 200
             assert response.json() == int(v)
 
     def test_add_model_endpoint_with_from_files(self, app):
-        _add_pipeline_endpoint(
+        _add_inference_endpoints(
             app,
             system_logging_config=Mock(),
             endpoint_config=Mock(route="/predict/parse_int"),
             pipeline=Mock(input_schema=FromFilesSchema, output_schema=int),
         )
-        assert app.routes[-2].path == "/predict/parse_int"
+        assert app.routes[-2].path == "/predict/parse_int/infer"
         assert app.routes[-2].endpoint.__annotations__ == {"request": FromFilesSchema}
-        assert app.routes[-1].path == "/predict/parse_int/from_files"
+        assert app.routes[-1].path == "/predict/parse_int/infer/from_files"
         assert app.routes[-1].endpoint.__annotations__ == {"request": List[UploadFile]}
         assert app.routes[-1].response_model is int
         assert app.routes[-1].methods == {"POST"}
 
     def test_sagemaker_only_adds_one_endpoint(self, app):
         num_routes = len(app.routes)
-        _add_pipeline_endpoint(
+        _add_inference_endpoints(
             app,
             endpoint_config=Mock(route="/predict/parse_int"),
             system_logging_config=Mock(),
             pipeline=Mock(input_schema=FromFilesSchema, output_schema=int),
             integration="sagemaker",
         )
         assert len(app.routes) == num_routes + 1
-        assert app.routes[-1].path == "/invocations"
+        assert app.routes[-1].path == "/invocations/infer"
         assert app.routes[-1].endpoint.__annotations__ == {"request": List[UploadFile]}
 
         num_routes = len(app.routes)
-        _add_pipeline_endpoint(
+        _add_inference_endpoints(
             app,
             endpoint_config=Mock(route="/predict/parse_int"),
             system_logging_config=Mock(),
             pipeline=Mock(input_schema=StrSchema, output_schema=int),
             integration="sagemaker",
         )
         assert len(app.routes) == num_routes + 1
-        assert app.routes[-1].path == "/invocations"
+        assert app.routes[-1].path == "/invocations/infer"
         assert app.routes[-1].endpoint.__annotations__ == {"request": StrSchema}
 
     def test_add_endpoint_with_no_route_specified(self, app):
-        _add_pipeline_endpoint(
+        _add_inference_endpoints(
             app,
-            endpoint_config=Mock(route=None),
+            endpoint_config=EndpointConfig(
+                route=None,
+                name="test_name",
+                task="text-classification",
+                model="default",
+            ),
             system_logging_config=Mock(),
             pipeline=Mock(input_schema=StrSchema, output_schema=int),
         )
-        assert app.routes[-1].path == "/predict"
+
+        assert app.routes[-1].path == "/v2/models/test_name/infer"
 
 
 class TestActualModelEndpoints:
@@ -191,11 +197,11 @@ def client(self):
 
     def test_static_batch_errors_on_wrong_batch_size(self, client):
         # this is okay because we can pad batches now
-        client.post("/predict/static-batch", json={"sequences": "today is great"})
+        client.post("/predict/static-batch/infer", json={"sequences": "today is great"})
 
     def test_static_batch_good_request(self, client):
         response = client.post(
-            "/predict/static-batch",
+            "/predict/static-batch/infer",
             json={"sequences": ["today is great", "today is terrible"]},
         )
         assert response.status_code == 200
@@ -212,7 +218,7 @@ def test_static_batch_good_request(self, client):
         ],
     )
     def test_dynamic_batch_any(self, client, seqs):
-        response = client.post("/predict/dynamic-batch", json={"sequences": seqs})
+        response = client.post("/predict/dynamic-batch/infer", json={"sequences": seqs})
         assert response.status_code == 200
         output = response.json()
         assert len(output["labels"]) == len(seqs)
@@ -242,17 +248,23 @@ def test_dynamic_add_and_remove_endpoint(engine_mock):
     # add /predict
     response = client.post(
         "/endpoints",
-        json=EndpointConfig(task="text-classification", model="default").dict(),
+        json=EndpointConfig(
+            task="text-classification", model="default", name="test_model"
+        ).dict(),
     )
+
     assert response.status_code == 200
-    response = client.post("/predict", json=dict(sequences="asdf"))
+
+    response = client.post("/v2/models/test_model/infer", json=dict(sequences="asdf"))
     assert response.status_code == 200
 
     # remove /predict
     response = client.delete(
         "/endpoints",
         json=EndpointConfig(
-            route="/predict", task="text-classification", model="default"
+            route="/v2/models/test_model/infer",
+            task="text-classification",
+            model="default",
         ).dict(),
     )
     assert response.status_code == 200