Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: use status code 400 when batch is empty #413

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ jobs:

- name: Extract metadata (tags, labels) for Docker
id: meta-grpc
if: ${{ matrix.grpc }}
uses: docker/metadata-action@v5
with:
images: |
Expand All @@ -142,6 +143,7 @@ jobs:

- name: Build and push Docker image
id: build-and-push-grpc
if: ${{ matrix.grpc }}
uses: docker/build-push-action@v6
with:
context: .
Expand Down
7 changes: 7 additions & 0 deletions .github/workflows/matrix.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
"sccache": true,
"cudaComputeCap": 75,
"extraBuildArgs": "DEFAULT_USE_FLASH_ATTENTION=False",
"grpc": true,
"dockerfile": "Dockerfile-cuda"
},
{
Expand All @@ -14,6 +15,7 @@
"runOn": "always",
"sccache": true,
"cudaComputeCap": 80,
"grpc": true,
"dockerfile": "Dockerfile-cuda"
},
{
Expand All @@ -22,6 +24,7 @@
"runOn": "main",
"sccache": true,
"cudaComputeCap": 86,
"grpc": true,
"dockerfile": "Dockerfile-cuda"
},
{
Expand All @@ -30,6 +33,7 @@
"runOn": "main",
"sccache": true,
"cudaComputeCap": 89,
"grpc": true,
"dockerfile": "Dockerfile-cuda"
},
{
Expand All @@ -38,20 +42,23 @@
"runOn": "main",
"sccache": true,
"cudaComputeCap": 90,
"grpc": true,
"dockerfile": "Dockerfile-cuda"
},
{
"name": "All",
"imageNamePrefix": "cuda-",
"runOn": "main",
"sccache": false,
"grpc": false,
"dockerfile": "Dockerfile-cuda-all"
},
{
"name": "cpu",
"imageNamePrefix": "cpu-",
"runOn": "main",
"sccache": true,
"grpc": true,
"dockerfile": "Dockerfile"
}
]
1 change: 1 addition & 0 deletions router/src/grpc/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1534,6 +1534,7 @@ impl From<ErrorResponse> for Status {
ErrorType::Overloaded => Code::ResourceExhausted,
ErrorType::Validation => Code::InvalidArgument,
ErrorType::Tokenizer => Code::FailedPrecondition,
ErrorType::Empty => Code::InvalidArgument,
};

Status::new(code, value.error)
Expand Down
45 changes: 35 additions & 10 deletions router/src/http/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,8 @@ example = json ! ({"error": "Inference failed", "error_type": "backend"})),
example = json ! ({"error": "Model is overloaded", "error_type": "overloaded"})),
(status = 422, description = "Tokenization error", body = ErrorResponse,
example = json ! ({"error": "Tokenization error", "error_type": "tokenizer"})),
(status = 400, description = "Batch is empty", body = ErrorResponse,
example = json ! ({"error": "Batch is empty", "error_type": "empty"})),
(status = 413, description = "Batch size error", body = ErrorResponse,
example = json ! ({"error": "Batch size error", "error_type": "validation"})),
)
Expand Down Expand Up @@ -285,6 +287,8 @@ example = json ! ({"error": "Inference failed", "error_type": "backend"})),
example = json ! ({"error": "Model is overloaded", "error_type": "overloaded"})),
(status = 422, description = "Tokenization error", body = ErrorResponse,
example = json ! ({"error": "Tokenization error", "error_type": "tokenizer"})),
(status = 400, description = "Batch is empty", body = ErrorResponse,
example = json ! ({"error": "Batch is empty", "error_type": "empty"})),
(status = 413, description = "Batch size error", body = ErrorResponse,
example = json ! ({"error": "Batch size error", "error_type": "validation"})),
)
Expand All @@ -306,7 +310,7 @@ async fn rerank(
tracing::error!("{message}");
let err = ErrorResponse {
error: message,
error_type: ErrorType::Validation,
error_type: ErrorType::Empty,
};
let counter = metrics::counter!("te_request_failure", "err" => "validation");
counter.increment(1);
Expand Down Expand Up @@ -471,6 +475,8 @@ example = json ! ({"error": "Inference failed", "error_type": "backend"})),
example = json ! ({"error": "Model is overloaded", "error_type": "overloaded"})),
(status = 422, description = "Tokenization error", body = ErrorResponse,
example = json ! ({"error": "Tokenization error", "error_type": "tokenizer"})),
(status = 400, description = "Batch is empty", body = ErrorResponse,
example = json ! ({"error": "Batch is empty", "error_type": "empty"})),
(status = 413, description = "Batch size error", body = ErrorResponse,
example = json ! ({"error": "Batch size error", "error_type": "validation"})),
)
Expand All @@ -489,7 +495,7 @@ async fn similarity(
tracing::error!("{message}");
let err = ErrorResponse {
error: message,
error_type: ErrorType::Validation,
error_type: ErrorType::Empty,
};
let counter = metrics::counter!("te_request_failure", "err" => "validation");
counter.increment(1);
Expand Down Expand Up @@ -553,6 +559,8 @@ example = json ! ({"error": "Inference failed", "error_type": "backend"})),
example = json ! ({"error": "Model is overloaded", "error_type": "overloaded"})),
(status = 422, description = "Tokenization error", body = ErrorResponse,
example = json ! ({"error": "Tokenization error", "error_type": "tokenizer"})),
(status = 400, description = "Batch is empty", body = ErrorResponse,
example = json ! ({"error": "Batch is empty", "error_type": "empty"})),
(status = 413, description = "Batch size error", body = ErrorResponse,
example = json ! ({"error": "Batch size error", "error_type": "validation"})),
)
Expand Down Expand Up @@ -615,7 +623,7 @@ async fn embed(
tracing::error!("{message}");
let err = ErrorResponse {
error: message,
error_type: ErrorType::Validation,
error_type: ErrorType::Empty,
};
let counter = metrics::counter!("te_request_failure", "err" => "validation");
counter.increment(1);
Expand Down Expand Up @@ -722,6 +730,8 @@ example = json ! ({"error": "Inference failed", "error_type": "backend"})),
example = json ! ({"error": "Model is overloaded", "error_type": "overloaded"})),
(status = 422, description = "Tokenization error", body = ErrorResponse,
example = json ! ({"error": "Tokenization error", "error_type": "tokenizer"})),
(status = 400, description = "Batch is empty", body = ErrorResponse,
example = json ! ({"error": "Batch is empty", "error_type": "empty"})),
(status = 413, description = "Batch size error", body = ErrorResponse,
example = json ! ({"error": "Batch size error", "error_type": "validation"})),
)
Expand Down Expand Up @@ -792,7 +802,7 @@ async fn embed_sparse(
tracing::error!("{message}");
let err = ErrorResponse {
error: message,
error_type: ErrorType::Validation,
error_type: ErrorType::Empty,
};
let counter = metrics::counter!("te_request_failure", "err" => "validation");
counter.increment(1);
Expand Down Expand Up @@ -900,6 +910,8 @@ example = json ! ({"error": "Inference failed", "error_type": "backend"})),
example = json ! ({"error": "Model is overloaded", "error_type": "overloaded"})),
(status = 422, description = "Tokenization error", body = ErrorResponse,
example = json ! ({"error": "Tokenization error", "error_type": "tokenizer"})),
(status = 400, description = "Batch is empty", body = ErrorResponse,
example = json ! ({"error": "Batch is empty", "error_type": "empty"})),
(status = 413, description = "Batch size error", body = ErrorResponse,
example = json ! ({"error": "Batch size error", "error_type": "validation"})),
)
Expand Down Expand Up @@ -961,7 +973,7 @@ async fn embed_all(
tracing::error!("{message}");
let err = ErrorResponse {
error: message,
error_type: ErrorType::Validation,
error_type: ErrorType::Empty,
};
let counter = metrics::counter!("te_request_failure", "err" => "validation");
counter.increment(1);
Expand Down Expand Up @@ -1067,6 +1079,8 @@ example = json ! ({"message": "Inference failed", "type": "backend"})),
example = json ! ({"message": "Model is overloaded", "type": "overloaded"})),
(status = 422, description = "Tokenization error", body = OpenAICompatErrorResponse,
example = json ! ({"message": "Tokenization error", "type": "tokenizer"})),
(status = 400, description = "Batch is empty", body = OpenAICompatErrorResponse,
example = json ! ({"message": "Batch is empty", "type": "empty"})),
(status = 413, description = "Batch size error", body = OpenAICompatErrorResponse,
example = json ! ({"message": "Batch size error", "type": "validation"})),
)
Expand Down Expand Up @@ -1150,7 +1164,7 @@ async fn openai_embed(
tracing::error!("{message}");
let err = ErrorResponse {
error: message,
error_type: ErrorType::Validation,
error_type: ErrorType::Empty,
};
let counter = metrics::counter!("te_request_failure", "err" => "validation");
counter.increment(1);
Expand Down Expand Up @@ -1265,8 +1279,12 @@ path = "/tokenize",
request_body = TokenizeRequest,
responses(
(status = 200, description = "Tokenized ids", body = TokenizeResponse),
(status = 400, description = "Batch is empty", body = ErrorResponse,
example = json ! ({"error": "Batch is empty", "error_type": "empty"})),
(status = 413, description = "Batch size error", body = ErrorResponse,
example = json ! ({"error": "Batch size error", "error_type": "validation"})),
(status = 422, description = "Tokenization error", body = ErrorResponse,
example = json ! ({"message": "Tokenization error", "type": "tokenizer"})),
example = json ! ({"error": "Tokenization error", "error_type": "tokenizer"})),
)
)]
#[instrument(skip_all)]
Expand Down Expand Up @@ -1327,7 +1345,7 @@ async fn tokenize(
tracing::error!("{message}");
let err = ErrorResponse {
error: message,
error_type: ErrorType::Validation,
error_type: ErrorType::Empty,
};
let counter = metrics::counter!("te_request_failure", "err" => "validation");
counter.increment(1);
Expand Down Expand Up @@ -1377,8 +1395,12 @@ path = "/decode",
request_body = DecodeRequest,
responses(
(status = 200, description = "Decoded ids", body = DecodeResponse),
(status = 400, description = "Batch is empty", body = ErrorResponse,
example = json ! ({"error": "Batch is empty", "error_type": "empty"})),
(status = 413, description = "Batch size error", body = ErrorResponse,
example = json ! ({"error": "Batch size error", "error_type": "validation"})),
(status = 422, description = "Tokenization error", body = ErrorResponse,
example = json ! ({"message": "Tokenization error", "type": "tokenizer"})),
example = json ! ({"error": "Tokenization error", "error_type": "tokenizer"})),
)
)]
#[instrument(skip_all)]
Expand All @@ -1403,7 +1425,7 @@ async fn decode(
tracing::error!("{message}");
let err = ErrorResponse {
error: message,
error_type: ErrorType::Validation,
error_type: ErrorType::Empty,
};
let counter = metrics::counter!("te_request_failure", "err" => "validation");
counter.increment(1);
Expand Down Expand Up @@ -1454,6 +1476,8 @@ example = json ! ({"error": "Inference failed", "error_type": "backend"})),
example = json ! ({"error": "Model is overloaded", "error_type": "overloaded"})),
(status = 422, description = "Tokenization error", body = ErrorResponse,
example = json ! ({"error": "Tokenization error", "error_type": "tokenizer"})),
(status = 400, description = "Batch is empty", body = ErrorResponse,
example = json ! ({"error": "Batch is empty", "error_type": "empty"})),
(status = 413, description = "Batch size error", body = ErrorResponse,
example = json ! ({"error": "Batch size error", "error_type": "validation"})),
)
Expand Down Expand Up @@ -1804,6 +1828,7 @@ impl From<&ErrorType> for StatusCode {
ErrorType::Overloaded => StatusCode::TOO_MANY_REQUESTS,
ErrorType::Tokenizer => StatusCode::UNPROCESSABLE_ENTITY,
ErrorType::Validation => StatusCode::PAYLOAD_TOO_LARGE,
ErrorType::Empty => StatusCode::BAD_REQUEST,
}
}
}
Expand Down
1 change: 1 addition & 0 deletions router/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -516,6 +516,7 @@ pub enum ErrorType {
Overloaded,
Validation,
Tokenizer,
Empty,
}

#[derive(Serialize)]
Expand Down
Loading