huggingface · OlivierDehaene · Sep 19, 2024 · Sep 19, 2024
diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
@@ -127,6 +127,7 @@ jobs:
 
       - name: Extract metadata (tags, labels) for Docker
         id: meta-grpc
+        if: ${{ matrix.grpc }}
         uses: docker/metadata-action@v5
         with:
           images: |
@@ -142,6 +143,7 @@ jobs:
 
       - name: Build and push Docker image
         id: build-and-push-grpc
+        if: ${{ matrix.grpc }}
         uses: docker/build-push-action@v6
         with:
           context: .

diff --git a/.github/workflows/matrix.json b/.github/workflows/matrix.json
@@ -6,6 +6,7 @@
     "sccache": true,
     "cudaComputeCap": 75,
     "extraBuildArgs": "DEFAULT_USE_FLASH_ATTENTION=False",
+    "grpc": true,
     "dockerfile": "Dockerfile-cuda"
   },
   {
@@ -14,6 +15,7 @@
     "runOn": "always",
     "sccache": true,
     "cudaComputeCap": 80,
+    "grpc": true,
     "dockerfile": "Dockerfile-cuda"
   },
   {
@@ -22,6 +24,7 @@
     "runOn": "main",
     "sccache": true,
     "cudaComputeCap": 86,
+    "grpc": true,
     "dockerfile": "Dockerfile-cuda"
   },
   {
@@ -30,6 +33,7 @@
     "runOn": "main",
     "sccache": true,
     "cudaComputeCap": 89,
+    "grpc": true,
     "dockerfile": "Dockerfile-cuda"
   },
   {
@@ -38,20 +42,23 @@
     "runOn": "main",
     "sccache": true,
     "cudaComputeCap": 90,
+    "grpc": true,
     "dockerfile": "Dockerfile-cuda"
   },
   {
     "name": "All",
     "imageNamePrefix": "cuda-",
     "runOn": "main",
     "sccache": false,
+    "grpc": false,
     "dockerfile": "Dockerfile-cuda-all"
   },
   {
     "name": "cpu",
     "imageNamePrefix": "cpu-",
     "runOn": "main",
     "sccache": true,
+    "grpc": true,
     "dockerfile": "Dockerfile"
   }
 ]
diff --git a/router/src/grpc/server.rs b/router/src/grpc/server.rs
@@ -1534,6 +1534,7 @@ impl From<ErrorResponse> for Status {
             ErrorType::Overloaded => Code::ResourceExhausted,
             ErrorType::Validation => Code::InvalidArgument,
             ErrorType::Tokenizer => Code::FailedPrecondition,
+            ErrorType::Empty => Code::InvalidArgument,
         };
 
         Status::new(code, value.error)

diff --git a/router/src/http/server.rs b/router/src/http/server.rs
@@ -89,6 +89,8 @@ example = json ! ({"error": "Inference failed", "error_type": "backend"})),
 example = json ! ({"error": "Model is overloaded", "error_type": "overloaded"})),
 (status = 422, description = "Tokenization error", body = ErrorResponse,
 example = json ! ({"error": "Tokenization error", "error_type": "tokenizer"})),
+(status = 400, description = "Batch is empty", body = ErrorResponse,
+example = json ! ({"error": "Batch is empty", "error_type": "empty"})),
 (status = 413, description = "Batch size error", body = ErrorResponse,
 example = json ! ({"error": "Batch size error", "error_type": "validation"})),
 )
@@ -285,6 +287,8 @@ example = json ! ({"error": "Inference failed", "error_type": "backend"})),
 example = json ! ({"error": "Model is overloaded", "error_type": "overloaded"})),
 (status = 422, description = "Tokenization error", body = ErrorResponse,
 example = json ! ({"error": "Tokenization error", "error_type": "tokenizer"})),
+(status = 400, description = "Batch is empty", body = ErrorResponse,
+example = json ! ({"error": "Batch is empty", "error_type": "empty"})),
 (status = 413, description = "Batch size error", body = ErrorResponse,
 example = json ! ({"error": "Batch size error", "error_type": "validation"})),
 )
@@ -306,7 +310,7 @@ async fn rerank(
         tracing::error!("{message}");
         let err = ErrorResponse {
             error: message,
-            error_type: ErrorType::Validation,
+            error_type: ErrorType::Empty,
         };
         let counter = metrics::counter!("te_request_failure", "err" => "validation");
         counter.increment(1);
@@ -471,6 +475,8 @@ example = json ! ({"error": "Inference failed", "error_type": "backend"})),
 example = json ! ({"error": "Model is overloaded", "error_type": "overloaded"})),
 (status = 422, description = "Tokenization error", body = ErrorResponse,
 example = json ! ({"error": "Tokenization error", "error_type": "tokenizer"})),
+(status = 400, description = "Batch is empty", body = ErrorResponse,
+example = json ! ({"error": "Batch is empty", "error_type": "empty"})),
 (status = 413, description = "Batch size error", body = ErrorResponse,
 example = json ! ({"error": "Batch size error", "error_type": "validation"})),
 )
@@ -489,7 +495,7 @@ async fn similarity(
         tracing::error!("{message}");
         let err = ErrorResponse {
             error: message,
-            error_type: ErrorType::Validation,
+            error_type: ErrorType::Empty,
         };
         let counter = metrics::counter!("te_request_failure", "err" => "validation");
         counter.increment(1);
@@ -553,6 +559,8 @@ example = json ! ({"error": "Inference failed", "error_type": "backend"})),
 example = json ! ({"error": "Model is overloaded", "error_type": "overloaded"})),
 (status = 422, description = "Tokenization error", body = ErrorResponse,
 example = json ! ({"error": "Tokenization error", "error_type": "tokenizer"})),
+(status = 400, description = "Batch is empty", body = ErrorResponse,
+example = json ! ({"error": "Batch is empty", "error_type": "empty"})),
 (status = 413, description = "Batch size error", body = ErrorResponse,
 example = json ! ({"error": "Batch size error", "error_type": "validation"})),
 )
@@ -615,7 +623,7 @@ async fn embed(
                 tracing::error!("{message}");
                 let err = ErrorResponse {
                     error: message,
-                    error_type: ErrorType::Validation,
+                    error_type: ErrorType::Empty,
                 };
                 let counter = metrics::counter!("te_request_failure", "err" => "validation");
                 counter.increment(1);
@@ -722,6 +730,8 @@ example = json ! ({"error": "Inference failed", "error_type": "backend"})),
 example = json ! ({"error": "Model is overloaded", "error_type": "overloaded"})),
 (status = 422, description = "Tokenization error", body = ErrorResponse,
 example = json ! ({"error": "Tokenization error", "error_type": "tokenizer"})),
+(status = 400, description = "Batch is empty", body = ErrorResponse,
+example = json ! ({"error": "Batch is empty", "error_type": "empty"})),
 (status = 413, description = "Batch size error", body = ErrorResponse,
 example = json ! ({"error": "Batch size error", "error_type": "validation"})),
 )
@@ -792,7 +802,7 @@ async fn embed_sparse(
                 tracing::error!("{message}");
                 let err = ErrorResponse {
                     error: message,
-                    error_type: ErrorType::Validation,
+                    error_type: ErrorType::Empty,
                 };
                 let counter = metrics::counter!("te_request_failure", "err" => "validation");
                 counter.increment(1);
@@ -900,6 +910,8 @@ example = json ! ({"error": "Inference failed", "error_type": "backend"})),
 example = json ! ({"error": "Model is overloaded", "error_type": "overloaded"})),
 (status = 422, description = "Tokenization error", body = ErrorResponse,
 example = json ! ({"error": "Tokenization error", "error_type": "tokenizer"})),
+(status = 400, description = "Batch is empty", body = ErrorResponse,
+example = json ! ({"error": "Batch is empty", "error_type": "empty"})),
 (status = 413, description = "Batch size error", body = ErrorResponse,
 example = json ! ({"error": "Batch size error", "error_type": "validation"})),
 )
@@ -961,7 +973,7 @@ async fn embed_all(
                 tracing::error!("{message}");
                 let err = ErrorResponse {
                     error: message,
-                    error_type: ErrorType::Validation,
+                    error_type: ErrorType::Empty,
                 };
                 let counter = metrics::counter!("te_request_failure", "err" => "validation");
                 counter.increment(1);
@@ -1067,6 +1079,8 @@ example = json ! ({"message": "Inference failed", "type": "backend"})),
 example = json ! ({"message": "Model is overloaded", "type": "overloaded"})),
 (status = 422, description = "Tokenization error", body = OpenAICompatErrorResponse,
 example = json ! ({"message": "Tokenization error", "type": "tokenizer"})),
+(status = 400, description = "Batch is empty", body = OpenAICompatErrorResponse,
+example = json ! ({"message": "Batch is empty", "type": "empty"})),
 (status = 413, description = "Batch size error", body = OpenAICompatErrorResponse,
 example = json ! ({"message": "Batch size error", "type": "validation"})),
 )
@@ -1150,7 +1164,7 @@ async fn openai_embed(
                 tracing::error!("{message}");
                 let err = ErrorResponse {
                     error: message,
-                    error_type: ErrorType::Validation,
+                    error_type: ErrorType::Empty,
                 };
                 let counter = metrics::counter!("te_request_failure", "err" => "validation");
                 counter.increment(1);
@@ -1265,8 +1279,12 @@ path = "/tokenize",
 request_body = TokenizeRequest,
 responses(
 (status = 200, description = "Tokenized ids", body = TokenizeResponse),
+(status = 400, description = "Batch is empty", body = ErrorResponse,
+example = json ! ({"error": "Batch is empty", "error_type": "empty"})),
+(status = 413, description = "Batch size error", body = ErrorResponse,
+example = json ! ({"error": "Batch size error", "error_type": "validation"})),
 (status = 422, description = "Tokenization error", body = ErrorResponse,
-example = json ! ({"message": "Tokenization error", "type": "tokenizer"})),
+example = json ! ({"error": "Tokenization error", "error_type": "tokenizer"})),
 )
 )]
 #[instrument(skip_all)]
@@ -1327,7 +1345,7 @@ async fn tokenize(
                 tracing::error!("{message}");
                 let err = ErrorResponse {
                     error: message,
-                    error_type: ErrorType::Validation,
+                    error_type: ErrorType::Empty,
                 };
                 let counter = metrics::counter!("te_request_failure", "err" => "validation");
                 counter.increment(1);
@@ -1377,8 +1395,12 @@ path = "/decode",
 request_body = DecodeRequest,
 responses(
 (status = 200, description = "Decoded ids", body = DecodeResponse),
+(status = 400, description = "Batch is empty", body = ErrorResponse,
+example = json ! ({"error": "Batch is empty", "error_type": "empty"})),
+(status = 413, description = "Batch size error", body = ErrorResponse,
+example = json ! ({"error": "Batch size error", "error_type": "validation"})),
 (status = 422, description = "Tokenization error", body = ErrorResponse,
-example = json ! ({"message": "Tokenization error", "type": "tokenizer"})),
+example = json ! ({"error": "Tokenization error", "error_type": "tokenizer"})),
 )
 )]
 #[instrument(skip_all)]
@@ -1403,7 +1425,7 @@ async fn decode(
                 tracing::error!("{message}");
                 let err = ErrorResponse {
                     error: message,
-                    error_type: ErrorType::Validation,
+                    error_type: ErrorType::Empty,
                 };
                 let counter = metrics::counter!("te_request_failure", "err" => "validation");
                 counter.increment(1);
@@ -1454,6 +1476,8 @@ example = json ! ({"error": "Inference failed", "error_type": "backend"})),
 example = json ! ({"error": "Model is overloaded", "error_type": "overloaded"})),
 (status = 422, description = "Tokenization error", body = ErrorResponse,
 example = json ! ({"error": "Tokenization error", "error_type": "tokenizer"})),
+(status = 400, description = "Batch is empty", body = ErrorResponse,
+example = json ! ({"error": "Batch is empty", "error_type": "empty"})),
 (status = 413, description = "Batch size error", body = ErrorResponse,
 example = json ! ({"error": "Batch size error", "error_type": "validation"})),
 )
@@ -1804,6 +1828,7 @@ impl From<&ErrorType> for StatusCode {
             ErrorType::Overloaded => StatusCode::TOO_MANY_REQUESTS,
             ErrorType::Tokenizer => StatusCode::UNPROCESSABLE_ENTITY,
             ErrorType::Validation => StatusCode::PAYLOAD_TOO_LARGE,
+            ErrorType::Empty => StatusCode::BAD_REQUEST,
         }
     }
 }

diff --git a/router/src/lib.rs b/router/src/lib.rs
@@ -516,6 +516,7 @@ pub enum ErrorType {
     Overloaded,
     Validation,
     Tokenizer,
+    Empty,
 }
 
 #[derive(Serialize)]