diff --git a/_sources/llm_deployment.md.txt b/_sources/llm_deployment.md.txt index b413d02061..282dd558fe 100644 --- a/_sources/llm_deployment.md.txt +++ b/_sources/llm_deployment.md.txt @@ -22,7 +22,7 @@ export token= You can then go ahead and launch a TorchServe instance serving your selected model: ```bash -docker run --rm -ti --gpus all -e HUGGING_FACE_HUB_TOKEN=$token -p 8080:8080 -v data:/data ts/llm --model_id meta-llama/Meta-Llama-3-8B-Instruct --disable_token +docker run --rm -ti --shm-size 1g --gpus all -e HUGGING_FACE_HUB_TOKEN=$token -p 8080:8080 -v data:/data ts/llm --model_id meta-llama/Meta-Llama-3-8B-Instruct --disable_token_auth ``` To change the model you just need to exchange the identifier given to the `--model_id` parameter. @@ -42,7 +42,7 @@ To rename the model endpoint from `predictions/model` to something else you can The launcher script can also be used outside a docker container by calling this after installing TorchServe following the [installation instruction](https://github.com/pytorch/serve/blob/feature/single_cmd_llm_deployment/README.md#-quick-start-with-torchserve). ```bash -python -m ts.llm_launcher --disable_token +python -m ts.llm_launcher --disable_token_auth ``` Please note that the launcher script as well as the docker command will automatically run on all available GPUs so make sure to restrict the visible number of device by setting CUDA_VISIBLE_DEVICES. diff --git a/llm_deployment.html b/llm_deployment.html index ac3c44a5ec..b1b07874c3 100644 --- a/llm_deployment.html +++ b/llm_deployment.html @@ -436,7 +436,7 @@

Quickstart LLM Deployment
docker run --rm -ti --gpus all -e HUGGING_FACE_HUB_TOKEN=$token -p 8080:8080 -v data:/data ts/llm --model_id meta-llama/Meta-Llama-3-8B-Instruct --disable_token
+
docker run --rm -ti --shm-size 1g --gpus all -e HUGGING_FACE_HUB_TOKEN=$token -p 8080:8080 -v data:/data ts/llm --model_id meta-llama/Meta-Llama-3-8B-Instruct --disable_token_auth
 

To change the model you just need to exchange the identifier given to the --model_id parameter. @@ -452,7 +452,7 @@

Quickstart LLM Deploymentpredictions/model to something else you can add --model_name <SOME_NAME> to the docker run command.

The launcher script can also be used outside a docker container by calling this after installing TorchServe following the installation instruction.

-
python -m ts.llm_launcher --disable_token
+
python -m ts.llm_launcher --disable_token_auth
 

Please note that the launcher script as well as the docker command will automatically run on all available GPUs so make sure to restrict the visible number of device by setting CUDA_VISIBLE_DEVICES.

diff --git a/searchindex.js b/searchindex.js index 7c09867a5b..8b4c5f06b0 100644 --- a/searchindex.js +++ b/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["FAQs", "README", "Security", "Troubleshooting", "api/dev_api", "api/modules", "api/run_circleci_tests", "api/setup", "api/test", "api/torchserve_sanity", "api/ts", "api/ts.metrics", "api/ts.model_service", "api/ts.protocol", "api/ts.torch_handler", "api/ts.torch_handler.request_envelope", "api/ts.torch_handler.unit_tests", "api/ts.torch_handler.unit_tests.models", "api/ts.torch_handler.unit_tests.test_utils", "api/ts.utils", "api/ts_scripts", "apis", "apple_silicon_support", "batch_inference_with_ts", "code_coverage", "configuration", "contents", "custom_service", "default_handlers", "getting_started", "github_actions", "grpc_api", "index", "inference_api", "internals", "large_model_inference", "linux_aarch64", "llm_deployment", "logging", "management_api", "metrics", "metrics_api", "model_api_control", "model_loading", "model_zoo", "nvidia_mps", "performance_checklist", "performance_guide", "request_envelopes", "rest_api", "server", "snapshot", "sphinx/requirements", "token_authorization_api", "torchserve_on_win_native", "torchserve_on_wsl", "use_cases", "workflow_inference_api", "workflow_management_api", "workflows"], "filenames": ["FAQs.md", "README.md", "Security.md", "Troubleshooting.md", "api/dev_api.rst", "api/modules.rst", "api/run_circleci_tests.rst", "api/setup.rst", "api/test.rst", "api/torchserve_sanity.rst", "api/ts.rst", "api/ts.metrics.rst", "api/ts.model_service.rst", "api/ts.protocol.rst", "api/ts.torch_handler.rst", "api/ts.torch_handler.request_envelope.rst", "api/ts.torch_handler.unit_tests.rst", "api/ts.torch_handler.unit_tests.models.rst", "api/ts.torch_handler.unit_tests.test_utils.rst", "api/ts.utils.rst", "api/ts_scripts.rst", "apis.rst", "apple_silicon_support.md", "batch_inference_with_ts.md", "code_coverage.md", "configuration.md", "contents.rst", "custom_service.md", "default_handlers.md", "getting_started.md", "github_actions.md", "grpc_api.md", "index.rst", "inference_api.md", "internals.md", "large_model_inference.md", "linux_aarch64.md", "llm_deployment.md", "logging.md", "management_api.md", "metrics.md", "metrics_api.md", "model_api_control.md", "model_loading.md", "model_zoo.md", "nvidia_mps.md", "performance_checklist.md", "performance_guide.md", "request_envelopes.md", "rest_api.md", "server.md", "snapshot.md", "sphinx/requirements.txt", "token_authorization_api.md", "torchserve_on_win_native.md", "torchserve_on_wsl.md", "use_cases.md", "workflow_inference_api.md", "workflow_management_api.md", "workflows.md"], "titles": ["FAQ\u2019S", "\u2757ANNOUNCEMENT: Security Changes\u2757", "Security Policy", "Troubleshooting Guide", "<no title>", "serve", "run_circleci_tests module", "setup module", "test package", "torchserve_sanity module", "ts package", "ts.metrics package", "ts.model_service package", "ts.protocol package", "ts.torch_handler package", "ts.torch_handler.request_envelope package", "ts.torch_handler.unit_tests package", "ts.torch_handler.unit_tests.models package", "ts.torch_handler.unit_tests.test_utils package", "ts.utils package", "ts_scripts package", "<no title>", "Apple Silicon Support", "Batch Inference with TorchServe", "Code Coverage", "Advanced configuration", "<no title>", "Custom Service", "TorchServe default inference handlers", "Getting started", "GitHub Actions for TorchServe", "TorchServe gRPC API", "TorchServe", "Inference API", "TorchServe internals", "Serving large models with Torchserve", "TorchServe on linux aarch64 - Experimental", "LLM Deployment with TorchServe", "Logging in Torchserve", "Management API", "TorchServe Metrics", "Metrics API", "Model API Control", "How to load a model in TorchServe", "Model Zoo", "Running TorchServe with NVIDIA MPS", "Model Inference Optimization Checklist", "Performance Guide", "Request Envelopes", "TorchServe REST API", "Running TorchServe", "TorchServe model snapshot", "cf. https://github.com/ryanfox/sphinx-markdown-tables/issues/36", "TorchServe token authorization API", "TorchServe on Windows", "TorchServe on Windows Subsystem for Linux (WSL)", "Torchserve Use Cases", "Workflow Inference API", "Management API", "TorchServe Workflows"], "terms": {"content": [0, 3, 4, 5, 25, 33, 37, 39, 47], "thi": [0, 1, 2, 3, 10, 11, 14, 15, 17, 19, 22, 24, 25, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 44, 45, 46, 47, 51, 53, 56, 58, 59], "document": [0, 1, 2, 22, 29, 31, 33, 34, 35, 37, 38, 39, 42, 45, 56], "relev": [0, 3, 10, 34, 59], "readm": [0, 10, 23], "compliant": [0, 49], "openapi": [0, 33, 39, 49], "3": [0, 22, 23, 25, 29, 30, 31, 33, 35, 36, 37, 38, 39, 41, 42, 44, 45, 49, 52, 53, 59], "0": [0, 2, 10, 11, 14, 15, 18, 22, 23, 25, 27, 28, 29, 31, 33, 35, 37, 38, 39, 40, 41, 42, 44, 47, 49, 52, 53, 54, 56], "your": [0, 1, 2, 3, 23, 25, 27, 28, 29, 32, 35, 37, 38, 39, 43, 45, 46, 47, 48, 50, 54, 56], "case": [0, 3, 11, 16, 25, 26, 27, 29, 31, 32, 33, 35, 40, 45, 46, 47, 50, 59], "you": [0, 1, 2, 3, 23, 25, 27, 28, 29, 33, 35, 36, 37, 38, 39, 40, 41, 43, 46, 47, 48, 49, 50, 51, 54, 56, 58], "abl": [0, 27, 35, 37, 47, 53, 54, 56], "mechan": [0, 25], "standalon": [0, 25, 37], "refer": [0, 1, 3, 23, 27, 29, 31, 34, 35, 40, 41, 42, 44, 45, 47, 50, 54, 56, 59], "cloud": [0, 25, 32, 34, 48], "cloudform": 0, "main": [0, 22, 23, 29, 31, 34, 47, 53, 54], "purpos": [0, 38], "serv": [0, 1, 4, 14, 15, 21, 22, 23, 24, 25, 26, 27, 31, 32, 33, 37, 38, 41, 42, 44, 45, 47, 48, 53, 54, 55, 57, 59], "via": [0, 1, 25, 27, 33, 40, 47, 50], "http": [0, 2, 10, 15, 22, 23, 25, 29, 30, 31, 33, 34, 37, 38, 39, 40, 41, 42, 50, 53, 54, 55, 56, 57, 58], "netti": [0, 22, 25], "engin": [0, 27, 39, 46, 56], "issu": [0, 46], "581": 0, "569": 0, "variou": [0, 14, 32, 43, 47], "provid": [0, 3, 10, 14, 23, 25, 27, 28, 31, 34, 35, 37, 39, 40, 45, 46, 47, 48, 50, 53, 58, 59], "out": [0, 15, 23, 25, 27, 28, 29, 37, 39, 46, 47, 50], "box": [0, 1, 14, 23, 28, 47], "checkout": [0, 30], "zoo": [0, 1, 26, 31], "list": [0, 1, 10, 11, 12, 14, 15, 19, 21, 22, 25, 26, 27, 28, 31, 33, 34, 35, 37, 40, 44, 48, 59], "all": [0, 2, 10, 11, 12, 14, 15, 17, 18, 19, 22, 23, 24, 25, 27, 28, 29, 32, 33, 34, 35, 37, 38, 39, 46, 47, 50, 54, 56, 58, 59], "also": [0, 3, 11, 14, 15, 25, 27, 28, 29, 31, 34, 35, 36, 37, 38, 40, 47, 50, 53, 54, 56], "check": [0, 1, 2, 3, 21, 22, 26, 28, 29, 31, 35, 39, 40, 46, 53, 56], "exampl": [0, 3, 16, 21, 23, 26, 27, 28, 29, 30, 32, 34, 35, 37, 38, 39, 40, 44, 45, 46, 47, 48, 50, 53, 56, 59], "folder": [0, 11, 25, 29, 34, 39, 50, 58], "No": [0, 25, 28, 43], "As": [0, 2, 27, 28, 29, 38, 45, 46], "now": [0, 1, 22, 23, 25, 29, 39, 42, 50, 53, 56], "onli": [0, 11, 17, 25, 27, 28, 29, 31, 32, 33, 35, 39, 40, 41, 42, 45, 46, 47, 48, 49, 50, 53, 54, 57, 58, 59], "deriv": [0, 15, 27, 28], "howev": [0, 25, 41, 53, 54, 56], "pytorch": [0, 1, 3, 14, 22, 23, 25, 27, 29, 31, 32, 33, 38, 39, 42, 43, 44, 46, 50, 52, 53, 54, 55, 57, 59], "It": [0, 12, 14, 15, 19, 24, 27, 28, 33, 35, 39, 41, 45, 46, 47, 50, 56, 58, 59], "ha": [0, 25, 27, 34, 35, 36, 39, 42, 43, 45, 47, 48, 50, 54, 56, 58], "new": [0, 22, 27, 28, 30, 31, 33, 35, 39, 40, 48, 53, 56], "featur": [0, 22, 23, 33, 35, 42, 51, 53], "snapshot": [0, 1, 22, 26, 50, 54, 59], "version": [0, 3, 4, 5, 19, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 33, 34, 35, 38, 42, 46, 50, 51, 56, 59], "By": [0, 2, 3, 25, 27, 31, 35, 40, 41, 49, 54], "utf": [0, 25, 33, 35], "8": [0, 23, 25, 29, 30, 33, 35, 38, 41, 44, 45, 58], "encod": [0, 2, 10, 11, 33, 34, 47], "string": [0, 1, 10, 12, 14, 25, 28, 40, 59], "If": [0, 2, 3, 23, 25, 27, 29, 33, 34, 35, 37, 38, 39, 40, 46, 47, 50, 51, 54, 56, 58], "convert": [0, 2, 14, 15, 25, 47], "byte": [0, 3, 25, 57, 59], "need": [0, 14, 17, 23, 24, 27, 34, 35, 37, 39, 40, 45, 47, 48, 54, 59], "codec": [0, 13], "specifi": [0, 3, 14, 15, 25, 27, 29, 30, 35, 39, 40, 42, 43, 50, 51, 54, 58], "github": [0, 2, 22, 23, 29, 31, 34, 47, 54, 55, 56], "com": [0, 2, 23, 25, 29, 31, 33, 34, 41, 54, 55, 56, 57], "blob": [0, 34, 56], "master": [0, 23, 29, 30, 33, 34, 55, 56, 57], "nmt": [0, 56], "_": [0, 3, 14, 23, 25, 27, 28, 31, 33, 34, 35, 37, 38, 39, 40, 41, 46, 48, 50, 52, 53, 54, 56, 58], "transform": [0, 23, 27, 35, 45, 46, 47], "py": [0, 2, 3, 14, 18, 24, 25, 27, 28, 29, 30, 31, 34, 35, 36, 45, 47, 48, 54, 55, 56], "guid": [0, 2, 26, 29, 32, 37, 55], "heavili": 0, "influenc": [0, 46], "launcher": [0, 37, 47], "core": [0, 46, 47], "pin": [0, 46, 47], "we": [0, 3, 23, 28, 29, 35, 37, 38, 45, 47, 50, 56], "recommend": [0, 2, 28, 31, 33, 35], "cpu_launcher_en": [0, 47], "true": [0, 2, 3, 10, 11, 14, 22, 23, 25, 27, 33, 35, 38, 39, 40, 42, 47, 53, 56], "cpu_launcher_arg": [0, 47], "use_logical_cor": [0, 47], "more": [0, 1, 2, 3, 16, 23, 25, 27, 28, 29, 33, 35, 36, 39, 41, 44, 45, 46, 50, 56, 59], "background": [0, 25], "found": [0, 36, 40, 41, 47], "blog": [0, 36, 46, 47], "post": [0, 3, 12, 14, 23, 25, 27, 33, 34, 35, 37, 38, 39, 42, 46, 47, 54, 56, 57, 58], "configur": [0, 1, 2, 3, 10, 26, 31, 33, 35, 38, 39, 41, 42, 47, 49, 50, 51, 56, 57, 58], "ye": [0, 43], "environ": [0, 2, 3, 24, 30, 35, 39, 40, 42, 45, 47, 51, 53, 54, 55, 56], "variabl": [0, 2, 3, 35, 39, 40, 42, 47, 51, 53, 54], "detail": [0, 2, 3, 25, 27, 28, 31, 33, 35, 36, 39, 40, 41, 44, 45, 47, 54, 56, 58, 59], "requir": [0, 2, 23, 25, 27, 29, 33, 34, 35, 38, 39, 43, 46, 47, 48, 50, 53, 56, 59], "txt": [0, 25, 34, 35, 44, 45, 56], "while": [0, 3, 17, 25, 34, 35, 45, 46, 47, 50, 51, 56], "r": [0, 3, 35], "flag": [0, 3, 23, 25, 40, 51, 53, 56], "extra": [0, 3, 25, 27, 29, 35, 47, 54, 56], "helm": [0, 1], "chart": [0, 1], "node": [0, 24, 25, 35, 54, 56, 57, 59], "ec2": [0, 39], "cluster": [0, 34], "There": [0, 3, 25, 35, 38, 39, 43, 47, 50, 53], "format": [0, 1, 10, 14, 15, 25, 27, 33, 35, 39, 41, 46, 47, 48, 50], "templat": 0, "here": [0, 11, 15, 23, 27, 28, 29, 34, 35, 37, 39, 40, 41, 47, 50, 54, 56], "type": [0, 2, 3, 10, 11, 12, 14, 15, 25, 27, 31, 33, 35, 37, 39, 41, 42, 43, 44, 45, 46, 50, 53, 59], "behind": [0, 47, 48], "elast": 0, "loadbalanc": 0, "preserv": [0, 51], "runtim": [0, 2, 10, 14, 23, 27, 31, 39, 47, 51, 58], "across": [0, 14, 48, 51], "session": [0, 51], "instanc": [0, 17, 27, 29, 35, 36, 37, 39, 51, 53], "experienc": [0, 51], "either": [0, 2, 3, 25, 27, 34, 35, 37, 39, 40, 47, 51, 53], "plan": [0, 27, 29, 51, 54, 56], "unplan": [0, 51], "servic": [0, 1, 3, 4, 5, 12, 14, 19, 23, 25, 31, 34, 35, 38, 39, 45, 47, 49, 51], "stop": [0, 33, 34, 42, 50, 51], "its": [0, 3, 27, 35, 37, 45, 47, 51], "upon": [0, 51], "restart": [0, 40, 51, 54], "These": [0, 1, 2, 3, 23, 25, 34, 38, 39, 46, 47, 48, 50, 56, 59], "save": [0, 31, 32, 34, 38, 51, 56], "util": [0, 2, 4, 5, 10, 14, 23, 26, 27, 31, 33, 34, 35, 40, 45, 46, 47, 50, 54, 59], "script": [0, 2, 3, 11, 25, 27, 30, 35, 37, 39, 44, 47, 54], "hardwar": [0, 2, 22, 35, 45, 46, 47], "gpu": [0, 1, 10, 11, 12, 22, 23, 24, 28, 29, 30, 32, 34, 35, 37, 39, 40, 45, 46, 47, 55], "compat": [0, 12, 14, 34, 40, 46, 47], "A": [0, 1, 2, 10, 12, 14, 15, 22, 25, 27, 33, 34, 39, 47, 48, 50, 51, 54, 56, 59], "could": [0, 31, 33, 34, 35, 38, 40, 45, 46], "cuda": [0, 24, 25, 27, 29, 30, 35, 37, 45, 47, 55], "well": [0, 25, 37, 38, 45, 46, 48, 56], "build_imag": [0, 23], "sh": [0, 23, 39], "appropri": [0, 45, 53], "option": [0, 10, 27, 29, 33, 35, 37, 39, 40, 42, 43, 46, 50, 56, 58], "help": [0, 35, 41, 45, 46, 47, 50, 54, 56], "To": [0, 3, 22, 23, 25, 27, 28, 29, 31, 33, 35, 37, 38, 39, 40, 41, 44, 45, 47, 48, 49, 50, 57, 58], "command": [0, 10, 22, 23, 29, 30, 31, 33, 35, 37, 39, 41, 42, 45, 53, 56], "b": [0, 1, 10, 40, 48, 50, 53, 56], "branch_nam": 0, "commit_id": 0, "tag": [0, 14], "t": [0, 14, 22, 23, 25, 28, 29, 33, 34, 37, 38, 39, 53, 56, 57], "tagnam": 0, "latest": [0, 23, 30, 46, 56], "The": [0, 2, 3, 10, 14, 15, 19, 23, 24, 25, 27, 29, 33, 34, 35, 37, 38, 39, 40, 41, 42, 43, 45, 46, 47, 48, 49, 50, 51, 53, 54, 56, 57, 58, 59], "instal": [0, 1, 22, 23, 25, 30, 31, 32, 34, 35, 37, 41, 56], "where": [0, 14, 25, 30, 31, 33, 34, 35, 40, 42, 45, 46, 47, 50, 51, 53], "pypi": [0, 2, 34, 36, 54], "distribut": [0, 27, 30, 35], "look": [0, 23, 35, 37, 39, 46, 47, 48, 50], "accord": [0, 45], "doc": [0, 23, 29, 33, 34, 35, 40, 41, 56, 57], "overrid": [0, 11, 14, 25, 27, 39, 40, 50, 59], "store": [0, 10, 22, 23, 25, 31, 34, 40, 42, 50, 51, 53, 54, 56], "load": [0, 1, 2, 10, 12, 13, 14, 16, 18, 19, 22, 27, 31, 33, 34, 37, 38, 39, 40, 42, 45, 47, 50, 53, 56], "mandatori": [0, 50], "argument": [0, 10, 15, 25, 27, 37, 40], "dure": [0, 10, 25, 27, 29, 35, 39, 40], "start": [0, 1, 2, 10, 11, 22, 23, 25, 30, 31, 32, 33, 34, 35, 37, 38, 39, 41, 42, 43, 45, 46, 47, 49, 50, 51, 53, 54, 56], "defin": [0, 10, 11, 12, 14, 17, 19, 25, 27, 29, 30, 33, 35, 38, 40, 47, 50, 59], "overridden": [0, 17, 25], "line": [0, 10, 39, 40, 53, 54], "manag": [0, 1, 21, 22, 25, 26, 27, 29, 31, 34, 48, 49, 51, 53, 54, 56, 59], "decid": 0, "which": [0, 3, 11, 12, 19, 23, 25, 27, 29, 30, 33, 34, 35, 37, 40, 42, 45, 47, 48, 50, 51, 53, 54, 56, 59], "relationship": 0, "w": [0, 1, 14, 28, 38, 41], "ie": [0, 27, 35], "tool": [0, 1, 2, 27, 29, 31, 32, 34, 35, 45, 46, 56], "postman": [0, 56], "insomnia": 0, "even": [0, 46], "find": [0, 2, 19, 27, 34, 35, 39, 45, 46, 47], "plugin": [0, 2, 22, 34, 56], "sdk": [0, 34], "data": [0, 1, 2, 10, 12, 14, 15, 27, 29, 31, 33, 34, 35, 37, 39, 46, 47, 48, 56, 59], "valu": [0, 2, 3, 10, 11, 19, 25, 39, 40, 45, 47, 50, 54, 58, 59], "pair": [0, 11, 40, 50], "object": [0, 1, 10, 11, 12, 14, 15, 18, 25, 27, 28, 29, 34, 39, 44, 50, 56, 57, 59], "would": [0, 14, 25, 29, 30, 35, 40, 46, 50, 59], "modifi": [0, 11, 35, 47, 50, 53], "postprocess": [0, 12, 14, 27, 35, 38, 39, 58, 59], "extend": [0, 2, 12, 34], "just": [0, 29, 34, 37, 46], "method": [0, 10, 14, 15, 19, 23, 27, 37, 40, 42, 43, 46, 47], "code": [0, 1, 2, 10, 12, 13, 23, 25, 26, 33, 34, 35, 38, 39, 40, 42, 47, 48, 49, 50, 54], "zero": 0, "builtin": 0, "huggingfac": [0, 1, 27, 35, 37, 45], "zip": [0, 3, 25, 27, 50, 54], "consist": [0, 23, 24, 40, 45, 51, 59], "artifact": [0, 1, 14, 27, 34, 39, 43], "extens": [0, 24, 46, 50, 56], "cmd": [0, 3, 25, 53], "torch": [0, 3, 14, 22, 27, 35, 36, 39, 41, 46, 47, 48, 52, 54, 55, 56, 59], "step": [0, 3, 22, 27, 28, 29, 32, 39, 46, 54, 56], "given": [0, 3, 10, 19, 37, 39, 40, 41, 54, 56, 59], "current": [0, 10, 24, 25, 29, 31, 36, 37, 38, 39, 40, 48, 50, 51, 53, 58], "allow": [0, 2, 3, 22, 27, 29, 31, 33, 35, 37, 39, 42, 45, 47, 48, 53, 56, 58], "suppli": [0, 25, 27, 51, 58, 59], "one": [0, 17, 25, 28, 30, 31, 33, 35, 39, 40, 45, 48, 50, 53], "number": [0, 1, 3, 14, 22, 23, 25, 27, 28, 29, 31, 33, 34, 35, 37, 39, 40, 45, 47, 54, 56, 58, 59], "model_dir": [0, 10, 12, 16, 18, 27, 35], "locat": [0, 3, 23, 25, 27, 38, 39, 40, 50, 58], "access": [0, 2, 3, 33, 35, 37, 39, 40, 41, 45, 47, 49, 53, 56, 57, 58], "through": [0, 10, 14, 25, 35, 37, 42, 46, 47, 50, 53, 59], "context": [0, 4, 5, 12, 13, 14, 15, 16, 18, 25, 26, 27, 31, 33, 34, 35, 39, 40, 45, 46, 47, 59], "entri": [0, 10, 14, 25, 31, 35, 39, 50, 56], "point": [0, 10, 11, 14, 29, 31, 35, 39, 41, 45, 50], "snippet": [0, 27], "system_properti": [0, 10, 27], "get": [0, 1, 12, 14, 15, 19, 23, 24, 25, 27, 31, 33, 34, 35, 36, 39, 42, 45, 47, 50, 55, 57, 58], "cli": [0, 3, 34, 46], "633": 0, "both": [0, 1, 23, 25, 29, 31, 34, 39, 40, 45, 47, 48, 49, 56], "v2": [0, 15, 39], "signatur": [0, 12, 27, 48, 50], "note": [0, 24, 25, 27, 28, 29, 31, 33, 35, 37, 39, 40, 41, 42, 45, 46, 47, 51, 54, 56], "For": [0, 22, 23, 24, 25, 27, 28, 31, 33, 35, 37, 38, 39, 40, 41, 45, 46, 47, 48, 50, 51, 56], "replac": [0, 15, 19, 31, 33, 35, 39, 47, 53], "charact": [0, 2, 14], "e": [0, 2, 25, 33, 35, 37, 38, 39, 40, 46, 50, 52, 54, 56, 59], "26": 0, "669": 0, "local": [0, 23, 24, 25, 34, 39, 48, 50, 54, 55, 56, 58, 59], "publicli": [0, 2, 3], "uri": [0, 39, 58, 59], "work": [0, 22, 23, 25, 27, 32, 36, 39, 43, 45, 46, 47, 53, 54, 56], "veri": [0, 35], "same": [0, 25, 27, 31, 35, 39, 45, 46, 48, 56], "made": [0, 14, 38, 39, 40, 50, 56], "public": [0, 56, 59], "consol": 0, "instead": [0, 17], "few": [0, 47, 54, 59], "reason": [0, 25, 47], "overhead": [0, 40, 47], "someth": [0, 37], "dramat": [0, 47], "larger": [0, 33, 46, 47], "launch": [0, 23, 37, 59], "control": [0, 1, 29, 34, 39, 45, 50], "dep": [0, 3], "per": [0, 3, 14, 22, 35, 45, 47, 48, 56], "intend": [0, 1, 2], "develop": [0, 1, 29, 56], "should": [0, 14, 17, 23, 25, 27, 29, 30, 35, 37, 45, 46, 47, 48, 53, 54, 56], "pre": [0, 1, 2, 12, 14, 27, 34, 35, 39, 44, 46, 47, 56], "compress": 0, "decompress": 0, "becaus": [0, 23, 39, 40], "histor": 0, "came": 0, "involv": [0, 2, 45, 46], "unload": [0, 40, 42], "ton": 0, "bucket": [0, 22, 39, 46], "But": [0, 45], "user": [0, 2, 14, 19, 22, 25, 27, 31, 33, 34, 35, 37, 39, 40, 42, 47, 50, 51, 53, 54, 56, 59], "smaller": [0, 35, 45, 47], "choos": [0, 39], "good": [0, 51], "bet": 0, "enforc": [1, 2, 39, 53], "token": [1, 2, 14, 21, 26, 31, 33, 35, 37, 44, 46], "author": [1, 2, 21, 26, 33, 47], "enabl": [1, 2, 3, 22, 23, 27, 35, 36, 39, 40, 41, 45, 47, 49, 53, 56], "model": [1, 2, 4, 10, 11, 12, 13, 14, 15, 16, 18, 19, 21, 22, 26, 28, 31, 32, 33, 34, 36, 38, 41, 45, 47, 48, 52, 53, 54, 55, 57, 58], "api": [1, 10, 12, 22, 27, 34, 35, 43, 50, 51, 54, 56, 59], "disabl": [1, 22, 25, 33, 38, 39, 40, 41, 42, 46, 50], "ar": [1, 2, 15, 22, 23, 24, 25, 27, 29, 30, 31, 33, 34, 35, 37, 38, 39, 40, 41, 43, 44, 45, 46, 47, 49, 50, 51, 53, 54, 56, 59], "address": [1, 2, 22, 31, 42, 46, 53], "concern": [1, 2, 42, 53], "unauthor": [1, 25, 53], "call": [1, 15, 17, 25, 27, 29, 31, 33, 34, 35, 37, 38, 39, 40, 42, 43, 46, 47, 48, 49, 50, 51, 53, 56, 57, 58], "prevent": [1, 2, 22, 42, 51, 53], "potenti": [1, 2, 34, 46, 53], "malici": [1, 2, 42], "from": [1, 2, 10, 12, 13, 14, 15, 22, 23, 24, 25, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 45, 46, 47, 48, 49, 50, 51, 53, 56, 57, 58], "being": [1, 2, 19, 23, 35, 40, 42, 46, 50, 53, 56], "introduc": 1, "server": [1, 10, 11, 21, 22, 23, 24, 25, 26, 27, 29, 33, 34, 38, 42, 47, 48, 50, 51, 53, 54, 55, 56, 57], "follow": [1, 2, 10, 14, 15, 22, 23, 25, 27, 28, 29, 30, 31, 33, 35, 37, 38, 39, 40, 42, 45, 47, 50, 51, 53, 54, 56, 57, 58, 59], "inform": [1, 2, 10, 14, 23, 25, 27, 29, 34, 35, 38, 39, 41, 46, 47], "perform": [1, 2, 17, 25, 26, 29, 32, 35, 36, 38, 40, 45, 46], "flexibl": [1, 12, 32, 35, 53], "easi": [1, 29, 32, 37, 50], "us": [1, 2, 10, 11, 12, 14, 15, 17, 19, 25, 26, 27, 30, 31, 32, 33, 34, 37, 38, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 53, 58, 59], "eager": [1, 14, 19, 27, 43, 44], "mode": [1, 14, 22, 27, 35, 41, 44, 45, 47], "torchscript": [1, 14, 43, 44, 47, 56], "quick": [1, 32, 37, 47], "usag": [1, 11, 27, 34, 40, 47, 50, 56], "tutori": [1, 35], "archiv": [1, 22, 25, 28, 34, 35, 36, 39, 43, 44, 47, 50, 52, 54, 55, 56, 58, 59], "show": [1, 12, 23, 29, 39, 40, 43, 45, 46, 47, 50], "how": [1, 23, 25, 27, 29, 32, 34, 37, 38, 45, 46, 47, 50, 51], "packag": [1, 2, 4, 5, 23, 26, 27, 29, 34, 35, 44, 47, 54, 56, 59], "file": [1, 2, 10, 14, 19, 22, 23, 24, 27, 28, 29, 30, 31, 33, 38, 39, 40, 41, 42, 43, 47, 48, 51, 53, 54, 56, 58], "procedur": [1, 56], "explain": [1, 27, 32, 33, 35, 38], "rest": [1, 21, 25, 26, 33, 35, 50, 56, 57, 59], "specif": [1, 2, 14, 15, 31, 33, 34, 35, 39, 42, 45, 47, 49, 50, 56, 58], "endpoint": [1, 2, 10, 14, 29, 32, 33, 34, 37, 40, 41, 50], "grpc": [1, 2, 21, 26, 33, 39], "support": [1, 3, 11, 12, 14, 24, 25, 28, 29, 31, 33, 34, 35, 39, 40, 47, 48, 50, 51, 55, 57, 58, 59], "infer": [1, 10, 12, 13, 14, 21, 22, 25, 26, 27, 29, 31, 32, 34, 38, 39, 40, 44, 47, 49, 50, 53, 54, 56], "health": [1, 12, 21, 26, 31, 35], "deploi": [1, 3, 32, 34, 45, 46, 48], "scale": [1, 3, 21, 26, 27, 29, 32, 35, 42, 56, 58, 59], "log": [1, 3, 22, 25, 26, 34, 41, 46, 51, 53], "metric": [1, 4, 5, 10, 21, 22, 26, 32, 34, 38, 39, 49, 54], "prometheu": [1, 21, 26], "grafana": [1, 21, 26], "dashboard": [1, 41, 47], "captum": [1, 14, 15, 33, 44, 52], "explan": [1, 14, 15, 21, 26, 32, 48], "built": [1, 23, 25, 47, 50], "text": [1, 3, 14, 25, 27, 28, 36, 44], "imag": [1, 2, 10, 14, 22, 23, 25, 27, 28, 29, 33, 35, 37, 44, 47, 50, 56, 57], "batch": [1, 10, 11, 14, 15, 16, 25, 26, 27, 28, 32, 34, 35, 39, 40, 45, 46, 47, 59], "creat": [1, 3, 10, 11, 13, 23, 25, 29, 34, 35, 39, 41, 43, 45, 47, 53, 56], "workflow": [1, 21, 22, 25, 26, 29, 30, 31, 34, 35, 36, 44, 49, 50, 51, 52], "compos": [1, 14], "python": [1, 2, 10, 11, 15, 21, 22, 23, 26, 30, 35, 36, 37, 39, 48, 54, 55, 56, 59], "function": [1, 2, 3, 11, 12, 14, 15, 17, 19, 23, 27, 33, 34, 39, 47, 59], "sequenti": 1, "parallel": [1, 2, 35, 37, 45], "pipelin": [1, 35, 46], "classifi": [1, 27, 28, 40, 44, 56], "take": [1, 10, 14, 17, 25, 27, 29, 34, 35, 39, 45, 47, 50, 53], "an": [1, 2, 3, 11, 12, 14, 16, 19, 22, 23, 25, 29, 31, 33, 34, 35, 36, 39, 40, 43, 45, 46, 47, 50, 53, 54, 56], "return": [1, 10, 11, 12, 13, 14, 15, 16, 19, 29, 31, 33, 35, 39, 40, 41, 45, 50, 57, 58], "name": [1, 3, 10, 11, 14, 15, 19, 23, 25, 27, 28, 29, 30, 33, 34, 35, 37, 38, 39, 40, 41, 47, 48, 50, 54, 56, 58, 59], "input": [1, 2, 12, 14, 15, 17, 27, 28, 33, 35, 39, 44, 45, 46, 47, 48, 59], "classif": [1, 14, 27, 28, 44, 45], "base": [1, 4, 10, 11, 12, 14, 17, 18, 19, 25, 26, 27, 35, 41, 46, 47, 50, 56, 59], "vocabulari": [1, 14], "detector": [1, 27, 28, 56], "detect": [1, 2, 14, 22, 28, 44, 48, 50, 54, 56], "class": [1, 10, 11, 12, 14, 15, 16, 17, 18, 19, 28, 33, 35, 40, 48], "bound": [1, 14, 28], "respect": [1, 14, 28], "segment": [1, 14, 27, 28, 44, 56], "output": [1, 12, 14, 15, 25, 27, 28, 29, 33, 38, 39, 47, 59], "shape": [1, 14, 15, 28], "cl": [1, 28], "h": [1, 14, 28, 33, 39, 50, 53], "height": [1, 14, 28], "width": [1, 14, 28], "llm": [1, 31, 33, 35], "easili": [1, 12, 37, 49], "languag": [1, 37, 48], "sentenc": 1, "can": [1, 2, 16, 19, 23, 25, 27, 28, 29, 31, 33, 34, 35, 36, 37, 39, 40, 41, 42, 45, 46, 47, 49, 50, 51, 53, 54, 56, 58, 59], "sequenc": [1, 31, 33, 35, 44, 45, 46], "q": 1, "answer": [1, 54], "multi": [1, 32, 34, 45, 47, 48], "modal": 1, "framework": [1, 12, 22, 23, 34, 35, 47, 48], "build": [1, 23, 34, 37, 47, 54, 56], "combin": [1, 35, 37], "audio": [1, 47], "video": [1, 47], "dual": 1, "translat": [1, 15, 48], "train": [1, 17, 28, 29, 44, 46, 50, 56], "readi": [1, 23, 27, 29, 31, 33, 35, 39, 44, 48, 56], "mani": [1, 34, 45, 47, 48, 50, 51], "intern": [1, 12, 27, 35], "integr": [1, 2, 35, 37, 47], "usecas": [1, 28], "resnet50": 1, "hpu": 1, "compil": [1, 47], "run": [1, 2, 3, 10, 12, 14, 17, 23, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 42, 46, 47, 51, 53, 54, 56], "devic": [1, 14, 22, 25, 27, 28, 35, 37, 45, 56], "describ": [1, 14, 21, 25, 26, 37, 46, 59], "test": [1, 4, 5, 16, 17, 23, 24, 26, 28, 29, 30, 34, 36, 37, 39, 48, 54], "regress": [1, 22, 30, 35, 36], "befor": [1, 2, 23, 25, 27, 28, 33, 39, 45, 46], "ship": 1, "them": [1, 11, 17, 34, 39, 45, 46, 50, 53, 56], "product": [1, 32, 34, 47, 50, 56], "custom": [1, 2, 10, 11, 14, 22, 23, 26, 34, 35, 37, 39, 41, 43, 47], "encrypt": [1, 21, 26], "s3": [1, 25, 39], "side": [1, 21, 26, 33, 39, 49], "km": [1, 39, 40], "serial": [1, 22, 27, 29, 43, 45, 47, 56], "aw": [1, 2, 25, 32, 39, 45], "dynamo": 1, "db": 1, "benchmark": [1, 29, 34], "profil": [1, 54], "jmeter": 1, "apach": 1, "bench": 1, "itself": [1, 45], "kubernet": [1, 33, 34], "demonstr": [1, 27, 40, 56], "deploy": [1, 32, 35, 45, 48, 56], "azur": 1, "googl": [1, 31, 32, 34, 35, 48], "mlflow": 1, "kubeflow": 1, "vertex": [1, 32], "ai": [1, 32, 34, 35, 48], "nvidia": [1, 26, 46, 47, 54, 55, 56], "mp": [1, 26, 47], "optim": [1, 23, 32, 35, 37], "worker": [1, 10, 11, 21, 22, 23, 26, 27, 29, 31, 33, 34, 35, 38, 40, 42, 45, 47, 51, 56, 59], "singl": [1, 11, 12, 35, 37, 45, 50], "11": [2, 22, 30, 42, 45], "white_check_mark": 2, "much": [2, 46, 50], "possibl": [2, 48, 56], "torchserv": [2, 10, 14, 15, 18, 19, 21, 22, 24, 26, 27, 33, 39, 41, 42, 44, 48, 52, 57, 58], "reli": 2, "autom": 2, "scan": 2, "In": [2, 3, 23, 25, 27, 29, 35, 38, 40, 45, 46, 47, 50, 53, 56], "particular": [2, 29], "depend": [2, 22, 25, 29, 30, 31, 34, 35, 37, 40, 45, 47, 54, 55], "analysi": 2, "dependabot": 2, "docker": [2, 22, 34, 37, 47, 56], "snyk": 2, "codeql": 2, "listen": [2, 10, 29, 31, 33, 34, 39, 41, 49, 57, 58], "port": [2, 23, 31, 33, 34, 39, 41, 49, 50, 54, 57, 58], "8080": [2, 22, 23, 25, 29, 33, 37, 39, 49, 53, 56, 57], "8081": [2, 22, 23, 25, 39, 42, 49, 53, 56, 58], "8082": [2, 22, 23, 25, 41], "7070": [2, 25, 31], "7071": [2, 25, 31], "localhost": [2, 3, 23, 25, 31, 33, 37, 39, 41, 42, 49, 53, 56, 57, 58], "default": [2, 3, 11, 14, 15, 21, 22, 25, 26, 31, 33, 34, 35, 38, 41, 45, 47, 49, 50, 51, 53, 56, 57, 58, 59], "doe": [2, 10, 14, 31, 35, 43, 47, 51], "ani": [2, 3, 19, 27, 29, 31, 33, 34, 35, 37, 38, 39, 40, 56], "includ": [2, 22, 23, 28, 33, 35, 39, 40, 47, 48, 56, 59], "wildcard": 2, "pleas": [2, 23, 28, 31, 33, 35, 37, 39, 42, 44, 45, 47, 56], "awar": [2, 46], "risk": [2, 25], "give": [2, 3, 33, 47], "host": [2, 23, 25, 29, 30, 34, 35, 38, 40, 41, 42, 49, 50, 56], "shown": [2, 27, 35, 40, 45, 46, 47], "abov": [2, 24, 25, 27, 29, 38, 40, 44, 45, 53, 54, 56, 59], "s": [2, 3, 10, 11, 12, 19, 22, 25, 26, 27, 28, 29, 32, 33, 35, 37, 39, 41, 45, 46, 47, 50, 51, 56, 58, 59], "expos": [2, 10, 25, 47], "when": [2, 10, 11, 14, 19, 23, 25, 27, 29, 30, 31, 32, 33, 35, 38, 39, 40, 41, 42, 45, 46, 47, 48, 49, 51, 53], "contain": [2, 14, 25, 27, 28, 35, 37, 39, 43, 48, 50, 56], "map": [2, 10, 11, 14, 19, 28, 35, 59], "ip": [2, 25, 42], "Be": [2, 38], "sure": [2, 16, 23, 27, 28, 29, 35, 37, 39, 45, 46, 48, 54, 56], "valid": [2, 25], "authent": [2, 25], "mar": [2, 22, 23, 25, 27, 29, 33, 34, 35, 39, 40, 42, 43, 44, 50, 56, 58, 59], "download": [2, 3, 29, 35, 39, 41, 44, 54, 58], "internet": [2, 39, 58], "untrustworthi": 2, "sourc": [2, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 25, 29, 35, 36], "mai": [2, 3, 25, 27, 42, 53, 54], "have": [2, 22, 23, 25, 27, 29, 31, 33, 35, 37, 39, 40, 41, 45, 46, 47, 50, 53, 54, 56, 59], "compromis": 2, "applic": [2, 3, 25, 27, 31, 33, 37, 39, 42, 46, 47, 53, 56], "execut": [2, 16, 19, 22, 24, 25, 29, 30, 35, 41, 45, 47, 54, 56, 59], "arbitrari": [2, 25], "make": [2, 16, 23, 25, 27, 28, 29, 33, 34, 35, 37, 39, 40, 45, 46, 47, 48, 50, 54, 56, 57], "ve": [2, 29, 47], "audit": 2, "re": [2, 33, 47, 48, 50, 54, 59], "safe": 2, "trust": 2, "handler": [2, 3, 10, 14, 15, 16, 25, 26, 31, 33, 34, 35, 37, 39, 40, 43, 47, 48, 56], "clamd": 2, "org": [2, 15, 22, 23, 25, 29, 33, 34, 35, 38, 39, 42, 53, 54], "project": [2, 34, 35], "virustot": 2, "io": [2, 30, 46], "vt": 2, "fickl": 2, "trailofbit": 2, "insid": [2, 15, 45], "untrust": 2, "guarante": [2, 22, 34], "isol": [2, 45], "perspect": 2, "regist": [2, 17, 21, 23, 25, 26, 29, 31, 40, 42, 43, 47, 50, 56, 59], "url": [2, 3, 10, 22, 23, 25, 39, 42, 45, 50, 56, 58, 59], "set": [2, 3, 10, 15, 21, 22, 23, 25, 26, 31, 33, 35, 37, 40, 41, 45, 47, 48, 50, 51, 57, 58], "allowed_url": [2, 25], "paramet": [2, 10, 11, 12, 13, 14, 15, 19, 27, 35, 37, 38, 39, 40, 50, 51, 56, 58, 59], "config": [2, 22, 24, 33, 34, 40, 41, 42, 43, 45, 47, 48, 50, 51, 53, 54, 56], "properti": [2, 3, 10, 27, 34, 40, 41, 42, 47, 48, 50, 53, 54, 56], "restrict": [2, 37, 40, 56], "use_env_allowed_url": [2, 25], "read": [2, 10, 23, 25, 27, 53], "ssl": [2, 56], "two": [2, 3, 23, 25, 27, 38, 39, 43, 45, 49, 56], "wai": [2, 3, 25, 27, 35, 38, 39, 43, 50, 51], "keystor": [2, 25], "privat": [2, 25, 39], "kei": [2, 10, 11, 15, 25, 39, 48, 53, 56], "certif": [2, 25], "prepar": 2, "against": [2, 27, 38, 53], "bad": 2, "prompt": [2, 37, 54], "inject": 2, "some": [2, 3, 10, 27, 28, 29, 30, 35, 36, 37, 38, 39, 45, 46, 47, 54, 56], "g": [2, 23, 24, 25, 33, 35, 37, 39, 46, 54, 56, 59], "fuzz": 2, "sanit": 2, "feed": [2, 17, 46, 59], "rigor": 2, "techniqu": [2, 46], "strict": 2, "rule": 2, "filter": [2, 25], "remov": [2, 3, 14, 19, 53, 54], "fragment": 2, "special": [2, 44], "represent": 2, "verif": 2, "identifi": [2, 27, 37], "attempt": [2, 38, 59], "multipl": [2, 14, 25, 32, 33, 35, 40, 43, 45, 47, 56], "share": [2, 45, 56], "memori": [2, 11, 23, 25, 35, 40, 45, 47], "respons": [2, 3, 13, 14, 15, 22, 25, 27, 31, 33, 34, 39, 40, 56, 57], "ensur": [2, 16, 22, 27, 40, 46, 54], "interact": [2, 29, 34], "each": [2, 23, 25, 28, 31, 33, 35, 45, 47, 51, 53, 59], "other": [2, 3, 27, 34, 35, 39, 45, 46, 56, 58, 59], "primari": 2, "area": 2, "tenant": 2, "resourc": [2, 23, 29, 31, 35, 39, 45, 46, 54, 58], "alloc": [2, 25, 29, 45], "attack": 2, "www": [2, 15, 25, 35], "facebook": 2, "whitehat": 2, "amazon": [2, 32, 36, 41], "section": [3, 23, 29, 35, 40, 45, 53, 59], "common": [3, 30, 31, 47], "face": [3, 23], "correspond": [3, 29, 40, 48, 56], "usual": [3, 35, 37, 54], "verifi": [3, 23, 35, 36], "ss": 3, "ntl": 3, "grep": [3, 22], "kill": 3, "differ": [3, 11, 12, 25, 27, 31, 34, 35, 39, 40, 45, 46, 47, 48, 50, 53, 56], "than": [3, 25, 33, 46], "md": [3, 24, 56, 59], "542": 3, "occur": 3, "17": [3, 22, 30, 54, 55], "older": 3, "max": [3, 22, 23, 35, 38, 47, 59], "size": [3, 10, 11, 14, 22, 23, 25, 27, 28, 34, 35, 38, 39, 44, 45, 46, 47, 59], "roughli": 3, "6": [3, 28, 30, 35, 52], "5": [3, 14, 15, 28, 32, 35, 38, 39, 41, 52], "mb": [3, 11, 38, 40, 44], "henc": [3, 29], "greater": 3, "5mb": 3, "cannot": [3, 42], "upload": [3, 30, 42], "updat": [3, 11, 23, 28, 29, 32, 35, 39, 40, 46, 53, 56, 59], "max_request_s": [3, 25], "max_response_s": [3, 25], "cat": [3, 23, 33], "model_stor": [3, 23, 25, 29, 39, 40, 42, 45, 50, 53, 56], "ts": [3, 4, 5, 23, 25, 26, 27, 28, 31, 33, 34, 35, 37, 38, 39, 40, 42, 48, 50, 51, 56], "path": [3, 10, 23, 24, 25, 27, 29, 38, 39, 40, 43, 47, 50, 54, 55, 56, 58], "335": 3, "nc": [3, 22, 29, 40, 42, 50, 53, 56], "383": 3, "512": [3, 35, 46], "last": [3, 31, 33, 35, 51], "restor": [3, 51], "state": [3, 33, 34, 50, 51], "thrown": 3, "inconsist": 3, "compar": [3, 45, 47], "log_loc": [3, 38, 51], "system": [3, 11, 12, 22, 25, 27, 31, 34, 39, 40, 47, 48, 51, 54, 56, 58], "export": [3, 10, 25, 27, 29, 36, 37, 39, 47, 55], "desir": [3, 27], "extract": 3, "654": 3, "clear": 3, "messag": [3, 10, 12, 13, 19, 33, 38, 42, 50], "try": [3, 14, 39, 46, 47, 50, 53], "conflict": 3, "exist": [3, 11, 25, 28, 31, 33, 34, 35, 40], "500": [3, 19, 33, 40], "wa": [3, 23, 33, 34, 36, 38, 46], "whether": [3, 39], "spawn": [3, 47], "up": [3, 12, 23, 25, 27, 29, 31, 32, 34, 35, 39, 45, 46, 47, 50, 53, 56, 58], "increas": [3, 27, 39, 45, 46, 47, 56], "curl": [3, 21, 22, 23, 26, 29, 30, 37, 39, 41, 42, 53, 54, 56, 58], "x": [3, 23, 25, 30, 33, 37, 39, 42, 56, 58], "model_nam": [3, 10, 11, 12, 15, 16, 18, 27, 33, 35, 37, 39, 40, 41, 50, 56, 59], "like": [3, 11, 14, 15, 25, 29, 32, 33, 34, 35, 37, 39, 40, 46, 47, 50], "egg": [3, 52], "json": [3, 4, 10, 11, 14, 19, 24, 25, 26, 29, 33, 34, 35, 37, 39, 45, 47, 48, 53, 56, 57, 58, 59], "etc": [3, 27, 34, 46, 56], "write": [3, 25, 28, 48, 53], "566": 3, "waveglow": [3, 27], "speech": [3, 27, 36], "synthes": [3, 27], "creation": [3, 39], "mostli": [3, 47], "initi": [3, 12, 14, 18, 22, 25, 27, 35, 39, 40, 42, 43, 47, 56], "due": [3, 45], "erron": 3, "observ": 3, "miss": [3, 14, 27], "modul": [3, 4, 5, 26, 35, 54], "667": 3, "537": 3, "subpackag": [4, 5, 26], "submodul": [4, 5, 26, 31], "dimens": [4, 5, 10, 26, 47], "metric_collector": [4, 5, 10, 25, 26], "metric_encod": [4, 5, 10, 26], "metrics_stor": [4, 5, 10, 26], "process_memory_metr": [4, 5, 10, 26], "system_metr": [4, 5, 10, 26], "unit": [4, 5, 10, 16, 26, 28, 39, 40, 41], "model_servic": [4, 5, 10, 26], "protocol": [4, 5, 10, 15, 25, 26, 31, 33, 35, 39, 58], "otf_message_handl": [4, 5, 10, 26, 31, 33, 35], "torch_handl": [4, 5, 10, 26, 27, 28, 35], "request_envelop": [4, 10, 14, 26], "bodi": [4, 10, 14, 26, 27, 48], "kserv": [4, 10, 14, 21, 26, 39, 48], "kservev2": [4, 10, 14, 26], "unit_test": [4, 10, 14, 24, 26], "base_model": [4, 14, 16, 26], "test_util": [4, 14, 16, 26, 33, 35], "mock_context": [4, 14, 16, 26], "test_base_handl": [4, 10, 14, 26], "test_envelop": [4, 10, 14, 26], "test_image_classifi": [4, 10, 14, 26], "test_image_segment": [4, 10, 14, 26], "test_mnist_kf": [4, 10, 14, 26], "test_object_detector": [4, 10, 14, 26], "base_handl": [4, 5, 10, 26, 27, 47], "contract": [4, 5, 10, 26], "densenet_handl": [4, 5, 10, 26], "image_classifi": [4, 5, 10, 22, 23, 26, 27, 29, 31, 33, 53], "image_segment": [4, 5, 10, 26], "object_detector": [4, 5, 10, 26], "text_classifi": [4, 5, 10, 23, 26], "text_handl": [4, 5, 10, 26], "vision_handl": [4, 5, 10, 26], "timeit_decor": [4, 5, 10, 26], "arg_pars": [4, 5, 26], "model_load": [4, 5, 26], "model_serv": [4, 5, 26], "model_service_work": [4, 5, 26], "run_circleci_test": [4, 5, 26], "setup": [4, 5, 22, 25, 26, 27, 30, 34, 35, 39, 41, 56], "regression_test": [4, 5, 26], "torchserve_san": [4, 5, 24, 26], "ts_script": [4, 5, 24, 26, 29, 30, 31, 34, 36, 54, 55], "api_util": [4, 5, 26], "backend_util": [4, 5, 26], "frontend_util": [4, 5, 26], "install_depend": [4, 5, 24, 26, 29, 30, 36, 54], "install_from_src": [4, 5, 26, 54, 55], "marsgen": [4, 5, 26], "modelarchiver_util": [4, 5, 26], "print_env_info": [4, 5, 26], "regression_util": [4, 5, 26], "sanity_util": [4, 5, 26], "shell_util": [4, 5, 26], "torchserve_grpc_cli": [4, 5, 26, 29, 31], "tsutil": [4, 5, 26], "validate_model_on_gpu": [4, 5, 26], "workflow_archiver_util": [4, 5, 26], "pars": [10, 14, 19, 58], "argpars": 10, "parser": [10, 34], "todo": 10, "add": [10, 11, 28, 31, 34, 35, 37, 38, 39, 42, 48, 53, 54, 56], "static": [10, 25, 46], "extract_arg": 10, "arg": [10, 14, 17, 27, 34, 35, 39], "none": [10, 11, 12, 13, 14, 18, 19, 25, 27, 33, 35, 39, 40], "model_service_worker_arg": 10, "backend": [10, 19, 23, 27, 29, 31, 32, 33, 35, 38, 39, 41], "socket": [10, 13, 34, 47], "ts_parser": 10, "incom": [10, 23, 27, 34], "request": [10, 12, 14, 15, 22, 23, 25, 26, 29, 30, 31, 33, 34, 35, 37, 38, 39, 40, 41, 42, 44, 45, 50, 56, 59], "manifest": [10, 12, 27, 34, 39, 47, 50, 58], "batch_siz": [10, 23, 39, 47, 59], "mms_version": 10, "limit_max_image_pixel": [10, 25], "model_yaml_config": [10, 25, 35], "fix": [10, 46, 48], "time": [10, 11, 19, 23, 25, 27, 29, 33, 34, 35, 38, 39, 50, 51, 53, 59], "get_all_request_head": 10, "idx": [10, 11, 18, 40], "int": [10, 11, 14, 40, 59], "dict": [10, 12, 14, 27, 34, 39, 40, 59], "str": [10, 11, 14, 19, 27, 33, 35, 40], "get_request_head": [10, 18, 39], "get_request_id": [10, 40], "get_response_content_typ": 10, "get_response_head": 10, "get_response_statu": 10, "tupl": 10, "get_sequence_id": 10, "request_processor": 10, "set_all_response_statu": 10, "200": [10, 31, 33, 35, 38, 39, 40, 42], "phrase": 10, "statu": [10, 23, 31, 33, 35, 39, 40, 42, 58], "individu": 10, "param": [10, 11, 14, 19, 27, 29, 56, 59], "set_response_content_typ": 10, "set_response_head": 10, "set_response_statu": 10, "index": [10, 11, 28, 40, 56], "sent": [10, 12, 25, 27, 31, 33, 35], "handl": [10, 12, 14, 15, 23, 25, 28, 31, 33, 34, 35, 39, 47, 50], "requestprocessor": 10, "request_head": 10, "processor": [10, 25], "add_response_properti": 10, "get_request_properti": 10, "get_response_status_cod": 10, "get_response_status_phras": 10, "report_statu": 10, "reason_phras": 10, "loader": [10, 34, 47], "modelload": 10, "abstract": [10, 12, 15, 27], "gpu_id": [10, 18, 27], "envelop": [10, 15, 26, 34], "bool": [10, 17, 19], "modelloaderfactori": 10, "get_model_load": 10, "tsmodelload": 10, "1": [10, 15, 22, 23, 25, 27, 28, 29, 33, 37, 38, 39, 40, 41, 42, 44, 45, 47, 50, 53, 56, 58, 59], "metrics_cach": 10, "metricscacheyamlimpl": 10, "load_properti": 10, "file_path": 10, "modelservicework": 10, "mm": [10, 12], "front": [10, 11], "end": [10, 11, 40, 43, 46], "commun": [10, 44], "binari": [10, 22, 34], "torchmodelservicework": 10, "s_type": 10, "s_name": 10, "host_addr": 10, "port_num": 10, "metrics_config": [10, 40], "handle_connect": 10, "cl_socket": 10, "connect": [10, 34, 39, 45], "load_model": [10, 23, 25], "load_model_request": 10, "expect": [10, 15, 23, 25, 28, 33, 45, 47, 48, 53, 56], "modelpath": 10, "modelnam": [10, 23, 25, 39, 40, 41, 42, 58], "cpu": [10, 11, 22, 24, 27, 29, 30, 32, 40, 44, 46, 47], "els": [10, 14, 27, 37, 39, 40], "wrapper": [10, 12], "unwrapp": 10, "batchsiz": [10, 23, 25, 31, 35, 39, 58], "limitmaximagepixel": 10, "limit": [10, 22, 35, 39, 45, 47, 55, 56, 58], "pillow": [10, 46], "max_image_pixel": 10, "run_serv": 10, "process": [10, 11, 12, 14, 23, 25, 27, 29, 32, 34, 35, 39, 43, 45, 46, 47], "customservic": 10, "definit": [10, 14, 19, 40], "entry_point": 10, "predict": [10, 14, 15, 21, 22, 23, 25, 26, 28, 31, 35, 37, 38, 39, 40, 50, 53, 56, 59], "request_input": 10, "retrieve_data_for_infer": 10, "requestid": 10, "111": [10, 15], "222": 10, "3333": 10, "contenttyp": 10, "val1": 10, "set_cl_socket": 10, "emit_metr": 10, "emit": [10, 19, 40], "dictionari": [10, 11, 14, 15, 19, 27, 39], "metric_nam": [10, 40], "c": [10, 22, 25, 29, 32, 34, 38, 45, 49, 50, 54], "standard": [10, 27, 42, 48, 53], "ping": [10, 12, 23, 31, 33, 35], "descript": [10, 21, 26, 28, 35, 50, 59], "d": [10, 14, 37, 38, 39, 45, 48, 50], "wait": [10, 23, 35, 39, 45, 59], "to_dict": 11, "request_id": [11, 31, 33, 35, 40], "metric_method": 11, "gener": [11, 23, 24, 25, 27, 29, 31, 32, 33, 35, 39, 44, 45, 47, 49, 51, 53], "print": [11, 38, 39], "stdout": [11, 29, 38], "reset": [11, 39], "order": [11, 22, 25, 39, 40, 45, 46, 48, 53, 56], "float": [11, 40], "dump": [11, 38], "metricencod": 11, "skipkei": 11, "fals": [11, 13, 22, 23, 25, 27, 30, 33, 35, 39, 41, 42, 53], "ensure_ascii": 11, "check_circular": 11, "allow_nan": 11, "sort_kei": 11, "indent": 11, "separ": [11, 25, 28, 35, 45], "jsonencod": 11, "obj": 11, "collect": [11, 25, 32, 38, 39, 40, 41, 46, 47, 50], "metricsstor": 11, "deprec": [11, 19, 31, 33, 35], "And": [11, 29, 34, 39, 47], "keep": [11, 34, 39], "add_count": [11, 40], "counter": [11, 41, 46], "increment": [11, 40], "add_error": 11, "error": [11, 14, 25, 38], "add_metr": [11, 40], "add_perc": [11, 40], "percentag": 11, "add_siz": [11, 40], "kb": [11, 40, 44], "gb": [11, 40], "add_tim": [11, 39, 40], "ms": [11, 23, 38, 39, 40, 41, 59], "latenc": [11, 31, 33, 40, 45, 46, 47], "accept": [11, 25, 27, 35, 39, 40], "pass": [11, 17, 18, 23, 25, 27, 35, 39, 43, 46, 47, 48, 51, 58, 59], "pid": [11, 23, 34, 39], "gpuid": [11, 23], "check_process_mem_usag": 11, "stdin": 11, "mem_util": 11, "get_cpu_usag": 11, "psutil": [11, 34, 55], "collect_al": 11, "mod": 11, "num_of_gpu": 11, "cpu_util": 11, "disk_avail": 11, "disk_us": 11, "disk_util": 11, "gpu_util": 11, "memory_avail": 11, "memory_us": 11, "memory_util": 11, "element": 11, "modelservic": 12, "wrap": [12, 50], "preprocess": [12, 14, 27, 35, 38, 39, 48, 58, 59], "manner": 12, "backward": [12, 34, 40, 47], "raw": [12, 14, 15, 23, 27, 29, 33, 55, 57], "back": [12, 27, 35], "client": [12, 21, 22, 26, 33, 34, 35, 39, 45, 49], "healthi": [12, 23, 33], "singlenodeservic": 12, "singlenodemodel": 12, "otf": 13, "create_load_model_respons": 13, "create_predict_respons": 13, "ret": 13, "req_id_map": 13, "ts_stream_next": 13, "encode_response_head": 13, "resp_hdr_map": 13, "retrieve_msg": 13, "conn": 13, "retriev": 13, "channel": [13, 25], "send_intermediate_predict_respons": [13, 31, 33, 35], "state_dict": 14, "basehandl": [14, 16, 17, 19, 28, 39, 43, 47], "abc": [14, 15, 35], "describe_handl": [14, 39], "explain_handl": [14, 27, 39], "data_preprocess": [14, 27, 39], "raw_data": [14, 27], "tensor": [14, 27, 35, 37, 46, 59], "unprocess": [14, 27], "target": [14, 23, 27, 41, 46, 56], "get_devic": 14, "self": [14, 25, 27, 30, 35, 39, 40, 43, 47], "outcom": [14, 39, 56], "pertain": [14, 38, 39], "kwarg": [14, 17, 27], "pt": [14, 17, 18, 27, 43, 56], "first": [14, 23, 25, 27, 29, 30, 32, 35, 37, 40, 45, 46, 51], "rais": [14, 27], "runtimeerror": [14, 27], "setup_ort_sess": 14, "model_pt_path": [14, 27, 47], "map_loc": 14, "densenethandl": 14, "match": [14, 27], "list_classes_from_modul": [14, 19], "parent_class": [14, 19], "imageclassifi": [14, 16, 27], "visionhandl": 14, "get_max_result_class": 14, "image_process": 14, "resiz": 14, "256": 14, "interpol": 14, "bilinear": 14, "max_siz": 14, "antialia": 14, "centercrop": 14, "224": 14, "totensor": 14, "normal": 14, "mean": [14, 25, 34, 35, 47], "485": 14, "456": 14, "406": 14, "std": 14, "229": 14, "225": 14, "set_max_result_class": 14, "topk": 14, "imagesegment": [14, 16], "n": [14, 22, 24, 25, 28, 31, 33, 35, 38, 39, 54], "k": [14, 23, 56], "objectdetector": [14, 16], "threshold": 14, "NOT": [14, 22, 35, 53], "textclassifi": 14, "texthandl": 14, "get_insight": [14, 27], "text_preprocess": 14, "calcul": [14, 27, 35], "insight": [14, 27, 47], "word": 14, "import": [14, 27, 31, 33, 35, 39, 40, 47], "form": [14, 15, 25, 40, 42, 47], "whose": [14, 27, 44], "ngram": 14, "2": [14, 15, 22, 23, 25, 27, 28, 30, 32, 33, 35, 37, 39, 42, 46, 47, 53, 56, 58, 59], "come": [14, 25, 34, 46, 47, 50], "output_explain": [14, 27], "hit": 14, "basic": [14, 16, 27, 35], "cleanup": 14, "oper": [14, 23, 40, 45, 46, 47], "html": [14, 24], "lowercas": 14, "expand": 14, "i": [14, 24, 25, 31, 32, 33, 35, 40, 43, 46, 54, 56], "don": [14, 34], "do": [14, 22, 24, 25, 27, 28, 35, 37, 38, 43, 47, 53, 56], "accent": 14, "punctuat": 14, "source_vocab": 14, "after": [14, 29, 30, 37, 38, 39, 40, 42, 46, 47, 50, 51, 54], "perfom": 14, "get_source_vocab_path": 14, "ctx": [14, 35], "get_word_token": 14, "input_token": 14, "construct": 14, "necessari": [14, 29, 35], "summarize_attribut": 14, "attribut": [14, 27, 40, 47], "summaris": 14, "vision": [14, 25, 46, 56], "tensor_data": 14, "requestenvelop": 15, "reformat": 15, "orchestr": [15, 48], "seldon": [15, 34, 48], "flat": [15, 48], "item": [15, 39, 46, 54, 58], "vice": [15, 35], "versa": [15, 35], "baseenvelop": 15, "handle_fn": [15, 16], "interfac": [15, 25, 54], "format_output": 15, "parse_input": 15, "grab": 15, "bodyenvelop": 15, "structur": [15, 48, 59], "outlin": 15, "tensorflow": 15, "tfx": 15, "api_rest": 15, "jsonenvelop": 15, "implement": [15, 27, 35, 39, 46, 47, 59], "captur": [15, 38, 59], "kserveenvelop": 15, "readabl": 15, "kservev2envelop": 15, "fserv": 15, "id": [15, 19, 23, 25, 27, 39, 40, 56], "f0222600": 15, "353f": 15, "47df": 15, "8d9d": 15, "c96d96fa894": 15, "bert": [15, 27, 44, 45, 46], "model_vers": [15, 39, 40, 41], "datatyp": 15, "int64": 15, "37": 15, "66": 15, "108": 15, "109": 15, "base_model_context": 16, "test_batch_handl": 16, "test_inference_with_profiler_works_with_custom_initialize_method": 16, "test_single_handl": 16, "test_binari": 16, "test_bodi": 16, "test_json": 16, "test_json_batch": 16, "test_json_double_batch": 16, "complex": 16, "mux": 16, "sever": [16, 34], "demux": 16, "result": [16, 19, 27, 31, 33, 35, 42, 45, 47, 50, 53, 57], "image_byt": 16, "tmp_path_factori": 16, "test_handl": 16, "test_handle_explain": 16, "simpl": [17, 29, 46], "forward": [17, 27, 46], "argmaxmodel": 17, "comput": [17, 29, 45], "everi": [17, 30, 40, 59], "subclass": 17, "although": 17, "recip": 17, "within": [17, 33, 39, 50], "afterward": 17, "sinc": [17, 40, 46], "former": 17, "care": [17, 34], "hook": 17, "latter": 17, "silent": 17, "ignor": [17, 51], "save_pt_fil": 17, "filepath": 17, "mock": 18, "ad": [18, 22, 32, 34, 35, 37, 40, 45, 47, 53], "without": [18, 35, 39, 45, 50, 51], "mockcontext": 18, "model_pt_fil": 18, "tmp": [18, 56], "model_fil": 18, "mnist": [18, 22, 27, 33, 44, 50, 56], "model_yaml_config_fil": 18, "replic": 18, "exp": 18, "timeit": 19, "decor": [19, 46], "func": 19, "pt2backend": 19, "enum": [19, 34, 40], "enumer": 19, "aot_cudagraph": 19, "aot_eag": 19, "aot_nvfus": 19, "fx2trt": 19, "inductor": 19, "ipex": [19, 37, 47], "nvfuser": 19, "ofi": 19, "onnxrt": 19, "openvino": 19, "torchxla_trace_onc": 19, "except": [19, 23, 45], "predictionexcept": [19, 27], "error_cod": 19, "check_valid_pt2_backend": 19, "klass": 19, "pendingdeprecationwarn": 19, "mark": 19, "warn": [19, 38, 40, 53], "categori": 19, "get_yaml_config": 19, "yaml_file_path": 19, "load_label_map": 19, "mapping_file_path": 19, "friendli": [19, 28], "map_class_to_label": 19, "prob": 19, "lbl_class": 19, "probabl": [19, 28, 33], "stream": [21, 26, 33], "unregist": [21, 25, 26, 31, 59], "ci": [22, 30, 34, 36], "job": [22, 25, 30, 32, 34, 40], "auto": [22, 25, 35], "devicetyp": [22, 35], "yaml": [22, 25, 33, 40, 43, 47, 58, 59], "report": [22, 24, 29, 47], "pytest": [22, 52], "maco": [22, 30, 39], "been": [22, 36, 39, 46, 47, 50, 53, 54, 56, 58], "densenet161": [22, 29, 31, 44, 53], "alexnet": [22, 44], "model_store_gen": 22, "10": [22, 23, 25, 38, 39, 40, 54, 55], "16": [22, 25, 38, 45], "heap": 22, "8192": 22, "m": [22, 24, 29, 31, 32, 37, 38], "librari": [22, 35, 47], "bin": [22, 54, 55], "python3": [22, 27], "127": [22, 23, 25, 29, 33, 38, 41, 42, 53, 56], "dir": [22, 27, 35, 56], "thread": [22, 25, 34, 35, 40, 47], "blacklist": 22, "regex": [22, 25], "maximum": [22, 23, 25, 33, 39, 45, 58, 59], "6553500": [22, 25], "pixel": [22, 25], "prefer": 22, "direct": 22, "buffer": 22, "cpp": 22, "024": 22, "04": [22, 30], "08t14": 22, "02": [22, 30, 39, 53], "380": 22, "info": [22, 27, 38, 39, 42], "servingsdk": 22, "impl": 22, "pluginsmanag": 22, "2024": [22, 42, 53], "391": 22, "modelserv": [22, 33, 39], "699": 22, "debug": [22, 38], "wlm": [22, 34, 38], "modelversionedref": 22, "modelmanag": [22, 34], "updatemodel": 22, "count": [22, 40, 41, 42], "kitten": [22, 23, 29, 31, 44, 53, 56], "jpg": [22, 23, 29, 31, 33, 44, 53, 56, 57], "tabbi": [22, 23, 29, 33], "40966302156448364": 22, "tiger_cat": [22, 23, 29], "3467046618461609": 22, "egyptian_cat": [22, 23, 29], "1300288736820221": 22, "lynx": [22, 23, 29], "02391958422958851": 22, "011532187461853027": 22, "myenv": 22, "pip": [22, 24, 29, 31, 35, 36, 54, 55], "torchaudio": 22, "torchdata": 22, "7": [22, 30, 35, 40, 44], "torchtext": [22, 52, 55], "torchvis": [22, 28, 46, 52, 55], "myenv3": 22, "nightli": [22, 30, 34, 47], "0b20240312": 22, "12b20240312": 22, "03": [22, 39, 42], "12t15": 22, "58": 22, "54": 22, "702": 22, "46661922335624695": 22, "46449029445648193": 22, "0661405548453331": 22, "001292439759708941": 22, "plastic_bag": [22, 29], "00022909720428287983": 22, "aggreg": [23, 34, 39, 40, 45], "send": [23, 25, 27, 29, 31, 33, 35], "ml": [23, 45], "dl": 23, "onc": [23, 31, 33, 35, 40, 41, 42, 47, 56], "design": [23, 25, 34], "nativ": [23, 25, 47], "most": [23, 25, 27, 33, 34, 38, 46, 47, 53], "turn": 23, "reduc": [23, 46, 47], "expens": [23, 46], "jump": 23, "what": [23, 32, 35, 47, 50], "max_batch_delai": [23, 39], "know": [23, 29, 46, 50], "fill": [23, 45], "full": [23, 31, 33, 35, 39, 40], "see": [23, 25, 27, 28, 29, 33, 35, 38, 39, 40, 41, 45, 47, 49, 50, 57, 58], "hug": 23, "4": [23, 25, 35, 39, 41, 44, 52, 53, 58, 59], "interest": [23, 47, 50], "delai": [23, 25, 38, 39, 59], "receiv": [23, 25, 31, 33, 35, 38, 40, 45, 59], "doesn": [23, 25, 39], "timer": 23, "ever": 23, "were": [23, 44, 50], "let": [23, 25, 50], "50": [23, 37, 59], "millisecond": [23, 25, 39, 40, 41, 59], "milli": 23, "second": [23, 25, 30, 39, 45, 47, 56], "defaultvers": [23, 25], "marnam": [23, 25], "minwork": [23, 25, 33, 35, 39, 58], "maxwork": [23, 25, 35, 39, 58], "maxbatchdelai": [23, 25, 35, 39, 58], "responsetimeout": [23, 25, 35], "120": [23, 25, 35, 39], "associ": [23, 40], "relat": [23, 29, 35, 56], "frontend": [23, 25, 29, 31, 33, 35, 54], "tri": [23, 35, 43], "bring": [23, 32], "inferenc": [23, 34], "thing": [23, 47, 50], "inference_address": [23, 25], "management_address": [23, 25], "go": [23, 32, 37, 46, 48], "10m": 23, "mar_fil": [23, 33, 35, 39, 42], "batch_v2": 23, "initial_work": [23, 39, 56], "properli": [23, 39, 53], "modelvers": [23, 39], "modelurl": [23, 39, 42], "loadedatstartup": [23, 39], "9000": [23, 38, 39, 41], "starttim": [23, 39], "2021": 23, "06": 23, "14t23": 23, "18": [23, 33, 38, 42, 45, 50], "21": [23, 41], "793z": 23, "memoryusag": [23, 39], "1726554112": 23, "19946": 23, "gpuusag": [23, 39], "mib": 23, "678": 23, "ljo": 23, "5798614621162415": 23, "38344162702560425": 23, "0342114195227623": 23, "0005819813231937587": 23, "quilt": 23, "000273319921689108": 23, "about": [23, 25, 27, 29, 46, 47, 50], "5000": [23, 58, 59], "Then": [23, 29, 35, 38], "14t22": 23, "44": [23, 39], "36": 23, "742z": 23, "19116": 23, "similar": [23, 35, 39], "previou": [23, 38, 39, 45, 58], "entrypoint": 23, "referenc": [23, 40], "metrics_address": [23, 25], "number_of_netty_thread": [23, 25], "32": [23, 42, 45], "job_queue_s": [23, 25], "1000": [23, 39], "home": [23, 24, 25, 29, 45, 54, 55, 56], "100": [23, 25, 35, 38, 39, 58], "cv": 23, "cu102": [23, 24, 29], "rm": [23, 37, 56], "p": [23, 24, 37, 56], "v": [23, 37, 39, 50, 56], "ubuntu": [23, 30, 39, 42, 45], "alreadi": [24, 29, 40, 46, 50, 53, 59], "dev": [24, 30, 34, 54], "cu121": [24, 29], "cu118": [24, 29], "cu117": [24, 29, 30], "cu116": [24, 29, 30], "cu113": [24, 29], "cu111": [24, 29], "cu101": [24, 29], "cu92": [24, 29], "gradlew": [24, 34], "clean": [24, 25, 47], "checkstyl": 24, "findbug": 24, "pmd": 24, "ut": 24, "cov": 24, "htmlcov": 24, "pylint": 24, "rn": 24, "rcfile": 24, "pylintrc": 24, "cd": [24, 31, 54, 55], "htmlcov_ut": 24, "model_archiv": 24, "htmlcov_it": 24, "integ_test": 24, "excut": 24, "npm": [24, 54], "linux": [24, 26, 30], "sudo": [24, 41, 45, 55], "apt": [24, 55], "y": 24, "nodej": [24, 54], "mac": 24, "brew": 24, "broken": 24, "directori": [24, 25, 27, 29, 50, 51, 53, 56], "recurs": [24, 31], "link_check_config": 24, "done": [24, 27, 47], "suffici": 25, "want": [25, 27, 29, 37, 38, 39, 43, 45, 50, 53, 56], "topic": [25, 32, 50], "avail": [25, 28, 29, 33, 35, 37, 39, 40, 47, 50, 51, 54, 56], "three": [25, 35, 40, 53], "prioriti": [25, 42, 53], "thei": [25, 35, 40, 50], "chang": [25, 27, 29, 33, 37, 39, 41, 51, 53, 54, 57, 58, 59], "behavior": [25, 27, 34, 35, 40, 42, 50, 53], "java": [25, 30, 49, 54], "pythonpath": [25, 39], "higher": [25, 45, 47], "ts_config_fil": 25, "log4j2": [25, 38, 40, 50], "xml": [25, 38, 40, 50], "foreground": 25, "footprint": [25, 45], "vmarg": [25, 38], "adjust": [25, 31, 39], "fit": [25, 34, 35], "present": [25, 27, 39, 40, 50, 54, 58], "model1": [25, 59], "model2": [25, 59], "disk": [25, 40, 46], "pathnam": 25, "avoid": [25, 29, 47], "bind": 25, "8443": [25, 56], "network": [25, 27, 46], "172": [25, 42], "grpc_inference_address": 25, "grpc_management_address": 25, "grpc_inference_port": 25, "grpc_management_port": 25, "grpc_inference_max_connection_age_m": 25, "infinit": [25, 39], "grpc_management_max_connection_age_m": 25, "grace": 25, "grpc_inference_max_connection_age_grace_m": 25, "grpc_management_max_connection_age_grace_m": 25, "443": 25, "whatev": 25, "traffic": 25, "must": [25, 27, 29, 33, 39, 42, 47, 53, 58], "password": 25, "pkcs12": 25, "pkcs8": 25, "openssl": 25, "x509": 25, "chain": 25, "keytool": 25, "storepass": 25, "own": [25, 27, 39, 45, 47], "genkei": 25, "keyalg": 25, "rsa": 25, "alia": [25, 54], "p12": 25, "changeit": 25, "storetyp": 25, "3600": 25, "keysiz": 25, "2048": 25, "dname": 25, "cn": 25, "my_t": 25, "ou": 25, "o": [25, 29, 33, 46, 55, 57], "l": 25, "palo": 25, "alto": 25, "st": 25, "california": 25, "8444": [25, 56], "8445": [25, 56], "keystore_pass": 25, "keystore_typ": 25, "sign": 25, "cert": 25, "req": 25, "dai": [25, 30], "365": [25, 41], "newkei": 25, "keyout": 25, "mykei": 25, "mycert": 25, "pem": 25, "private_key_fil": 25, "certificate_fil": 25, "addit": [25, 35, 39, 45, 46, 47, 48, 50], "header": [25, 33, 35, 37], "tell": [25, 33, 38, 47, 50], "browser": [25, 41], "web": [25, 49, 50], "domain": 25, "permiss": 25, "select": [25, 27, 35, 37, 45], "cors_allowed_origin": 25, "yourdomain": 25, "preflight": 25, "cors_allowed_method": 25, "put": [25, 27, 34, 35, 39, 56], "cors_allowed_head": 25, "xx": 25, "maxdirectmemorys": 25, "affect": [25, 46], "prefer_direct_buff": 25, "part": [25, 27, 32, 34, 39, 58], "seamless": [25, 27], "install_py_dep_per_model": [25, 56], "tar": 25, "gz": [25, 38], "might": [25, 29, 38, 39, 46, 50, 53], "sensit": 25, "credenti": [25, 39], "secur": [25, 26, 32, 42, 53], "blacklist_env_var": 25, "regular": [25, 47], "express": 25, "number_of_gpu": [25, 47, 56], "pci": 25, "bu": 25, "enable_metrics_api": [25, 41], "parametername1": 25, "parametervalue1": 25, "parametername2": 25, "parametervalue2": 25, "parameternamen": 25, "parametervaluen": 25, "minimum": [25, 39, 59], "msec": 25, "timeout": [25, 34, 35, 39, 59], "sec": 25, "over": [25, 31, 33, 35, 45, 46, 56, 59], "default_response_timeout": 25, "noop": [25, 39], "vgg16": [25, 44, 50], "embed": [25, 27], "distinct": 25, "determin": [25, 34, 35, 38, 45, 47, 56], "final": [25, 45], "lowest": 25, "highest": [25, 33, 46], "fulli": [25, 45, 47], "pippi": 25, "rpc": [25, 31, 35], "deviceid": [25, 35, 40], "round": [25, 27, 35, 39], "robin": [25, 27, 35], "strategi": [25, 30], "assign": [25, 34, 35, 39], "otherwis": [25, 28, 35], "tune": [25, 32, 47], "impact": [25, 40, 46, 47, 53], "scalabl": 25, "throughput": [25, 29, 35, 38, 45, 46, 47], "enable_envvars_config": [25, 42, 53], "child": 25, "eventloopgroup": 25, "group": 25, "eventloop": [25, 34], "event": 25, "logic": [25, 35, 47, 50], "netty_client_thread": 25, "workerthread": [25, 34], "default_workers_per_model": 25, "queue": [25, 35, 40, 45], "async_log": [25, 38], "asynchron": [25, 39], "deem": [25, 39], "unrespons": [25, 39], "reboot": [25, 39], "unregister_model_timeout": 25, "decode_input_request": 25, "decod": [25, 33, 35, 39, 46], "known": [25, 27, 40, 46, 51, 55], "bytearrai": 25, "convers": [25, 47], "initial_worker_port": 25, "model_server_hom": 25, "pil": 25, "larg": [25, 26, 32, 36, 37, 43, 46, 47], "payload": 25, "comma": 25, "amazonaw": 25, "workflow_stor": [25, 50, 58], "disable_system_metr": 25, "system_metrics_cmd": 25, "empti": [25, 31, 35], "collector": 25, "ts_": 25, "property_nam": 25, "ts_inference_address": 25, "troubleshoot": [26, 32], "coverag": 26, "advanc": [26, 35], "window": [26, 33, 51], "subsystem": 26, "wsl": 26, "polici": [26, 32, 38], "faq": [26, 32], "invok": [27, 50, 56], "Is": [27, 43], "ll": [27, 29, 47], "act": 27, "def": [27, 31, 33, 35, 39, 40, 59], "entry_point_function_nam": 27, "sampl": [27, 29, 37, 44], "jit": [27, 34, 47], "similarli": 27, "global": [27, 41, 53, 59], "is_avail": 27, "serialized_fil": 27, "serializedfil": 27, "os": [27, 30, 39], "join": [27, 33, 35], "isfil": 27, "engag": 27, "ask": [27, 32], "startup": [27, 34, 42, 51], "down": [27, 51, 53], "typic": [27, 40], "modelhandl": [27, 35], "__init__": [27, 35], "_context": 27, "prediciton": 27, "pred_out": 27, "unexpect": 27, "513": 27, "nonetheless": 27, "below": [27, 28, 29, 35, 41, 43, 45], "init": [27, 35], "pattern": [27, 38], "maintain": [27, 39], "model_handl": 27, "preprocessed_data": 27, "model_input": 27, "ndarrai": 27, "model_output": 27, "inference_output": 27, "postprocess_output": 27, "achiev": [27, 33, 35, 46], "place": [27, 56], "written": [27, 48], "hi": 27, "algorithm": [27, 34, 35], "lig": 27, "layerintegratedgradi": 27, "captum_sequence_forward": 27, "_is_explain": [27, 39], "so": [27, 29, 35, 37, 40, 54], "neccessari": 27, "logger": [27, 39], "row": 27, "isinst": 27, "statement": [27, 53], "default_handler_nam": 27, "defaulthandlerclass": 27, "customimageclassifi": 27, "procsess": 27, "goe": 27, "digit": [27, 44, 50], "model_version_numb": 27, "path_to_model_architecture_fil": 27, "path_to_state_dict_fil": 27, "comma_seperarted_additional_fil": 27, "skip": 27, "waveglow_synthes": 27, "waveglow_model": 27, "nvidia_waveglowpyt_fp32_20190306": 27, "pth": [27, 29, 56], "waveglow_handl": 27, "tacotron": 27, "nvidia_tacotron2pyt_fp32_20190306": 27, "vcpu": [27, 29], "fashion": [27, 45], "consum": 28, "imagenet": [28, 44], "dataset": [28, 44], "rgb": 28, "top": [28, 48], "ag": 28, "comprehens": 28, "page": [28, 35, 39, 44, 47, 50, 58], "automat": [28, 29, 31, 35, 37, 48, 56], "numer": 28, "simpli": [28, 35, 48], "welcom": 28, "isn": 28, "cover": [28, 38, 47, 50], "model_packag": 28, "alwai": [28, 46, 48], "saniti": 28, "submit": [28, 34, 44], "conda": [29, 34], "12": [29, 53], "9": [29, 44], "complet": [29, 31, 33, 35, 38, 39, 46, 56], "clone": [29, 31, 54, 55], "repositori": 29, "git": [29, 31, 52, 54, 55], "parent": 29, "root": 29, "my_path": 29, "mkdir": [29, 31, 56], "wget": [29, 55], "8d451a50": 29, "repo": [29, 31], "densenet_161": 29, "index_to_nam": [29, 56], "equal": [29, 33, 35, 40], "power": [29, 32], "lot": [29, 50], "autosc": 29, "consider": 29, "minim": [29, 35, 41], "move": [29, 35, 56], "later": 29, "finer": 29, "grain": 29, "u": [29, 31], "grpcio": [29, 31], "protobuf": [29, 31, 35], "proto": [29, 31], "grpc_tool": [29, 31], "protoc": [29, 31], "proto_path": [29, 31], "src": [29, 31, 34, 38, 54], "python_out": [29, 31], "grpc_python_out": [29, 31], "cute": 29, "githubusercont": [29, 33, 55, 57], "kitten_smal": [29, 33, 44, 57], "46933549642562866": 29, "4633878469467163": 29, "06456148624420166": 29, "0012828214094042778": 29, "00023323034110944718": 29, "seen": 29, "deep": [29, 35, 47, 50], "learn": [29, 32, 35, 47, 50], "registr": [29, 34, 39, 42, 56, 58, 59], "record": 29, "high": [29, 31, 32, 33, 35, 38, 46, 47, 50], "level": [29, 38, 40, 41, 42, 45, 46, 47, 50, 59], "percentil": 29, "precis": 29, "visual": [29, 54], "debugg": 29, "under": [30, 39, 47, 48, 54], "trigger": 30, "manual": 30, "workflow_dispatch": 30, "push": 30, "branch": 30, "pull": [30, 34, 44], "pull_request": 30, "15am": 30, "schedul": [30, 45, 47], "cron": 30, "15": [30, 38, 41, 52], "everyth": 30, "align": 30, "machin": [30, 32], "20": [30, 39, 41, 46], "term": [30, 45], "matrix": 30, "fail": [30, 54, 59], "fast": [30, 37, 47], "indic": [30, 56], "One": 30, "v3": 30, "architectur": [30, 32], "x84": 30, "zulu": 30, "shell": 30, "codecov": 30, "chmod": 30, "streampredict": [31, 35], "registermodel": [31, 39], "unregistermodel": [31, 39], "free": [31, 39, 47, 58], "scalework": [31, 39], "dynam": [31, 32, 34, 35, 39, 40, 46], "better": [31, 35, 39, 45, 47], "listmodel": [31, 39], "queri": [31, 39, 41, 58], "describemodel": [31, 39], "setdefault": [31, 39], "googleapi": 31, "stub": 31, "third_parti": 31, "intermedi": [31, 33, 35], "until": [31, 33, 35, 39], "forc": [31, 35], "inferenceapisservic": [31, 33, 35], "torchservehealthrespons": [31, 35], "predictionsrequest": [31, 35], "predictionrespons": [31, 35], "style": [31, 33, 35], "handler_util": [31, 33, 35], "v1": [31, 33, 35, 39], "rang": [31, 33, 35, 40], "intermediate_respons": [31, 33, 35], "success": [31, 33, 35, 38, 51], "hello": [31, 33, 35, 37], "world": [31, 33, 35], "llama": [32, 37], "inferentia2": [32, 37], "naver": 32, "studi": [32, 47], "transit": 32, "cost": [32, 46, 47], "intel": [32, 46], "oneapi": 32, "softwar": 32, "sagemak": [32, 34], "75": 32, "four": [32, 45], "tpuv5": 32, "monitor": 32, "datadog": 32, "anim": [32, 47], "draw": [32, 47], "walmart": 32, "search": 32, "grok": 32, "principl": [32, 46], "inferentia": 32, "children": 32, "life": 32, "evolut": 32, "cresta": 32, "migrat": 32, "quantit": 32, "comparison": 32, "platform": [32, 34, 48], "indepth": 32, "why": [32, 50], "best": [32, 47], "practic": 32, "improv": [32, 36, 45, 46, 47], "perfrom": 32, "view": [32, 33, 39, 41, 46, 50], "torcherv": 32, "frequent": 32, "question": 32, "correct": [33, 39, 53], "swagger": [33, 39, 49], "codegen": [33, 39, 49], "maxretrytimeoutinsec": 33, "5min": 33, "recov": 33, "dead": 33, "activ": [33, 35, 44, 45, 46, 51], "unhealthi": 33, "less": 33, "resnet": [33, 38, 42, 44, 50], "f": [33, 37, 56], "squeezenet1_1": [33, 39, 42], "dog": [33, 44, 50], "open": [33, 34, 35, 47, 53], "rb": 33, "n02123045": 33, "42514491081237793": 33, "chunk": 33, "test_echo_stream_infer": [33, 35], "start_torchserv": [33, 35], "no_config_snapshot": [33, 35], "gen_mar": [33, 35], "register_model": [33, 35], "echo_stream": [33, 35], "tf_inference_api": [33, 35], "foo": [33, 35], "assert": [33, 35], "transfer": [33, 35], "iter_cont": [33, 35], "chunk_siz": [33, 35], "append": [33, 35, 54], "unregister_model": [33, 35], "test_data": 33, "png": [33, 44], "004570948731989492": 33, "006216969640322402": 33, "008197565423679522": 33, "009563574612830427": 33, "008999274832810742": 33, "009673474804303854": 33, "007599905146155397": 33, "kf_request_json": 33, "spec": 33, "grade": 34, "track": [34, 46, 47], "those": [34, 42, 47, 51], "weight": [34, 43, 47, 50], "compon": [34, 46, 54], "portion": 34, "lifecycl": 34, "actual": [34, 50], "script_modul": 34, "eager_mode_model": 34, "along": [34, 35, 48, 50, 56], "storag": 34, "authz": 34, "authn": 34, "drop": [34, 47], "loadabl": 34, "dockerfil": [34, 37], "experiment": 34, "k8": 34, "serving_sdk": 34, "stuff": 34, "termin": [34, 38, 39, 54], "easier": [34, 47], "kfserv": 34, "startserv": 34, "8903ca1fb059eab3c1e8eccdee1376d4ff52fb67": 34, "workerstatelisten": 34, "close": 34, "workerst": 34, "workloadmanag": 34, "concurr": [34, 45, 46, 47], "hashmap": 34, "backendgroup": 34, "threadpool": 34, "executor": 34, "pool": 34, "task": 34, "batchaggreg": 34, "apiutil": 34, "configmanag": [34, 54], "split": [35, 59], "partit": 35, "vllm": [35, 37], "speed": [35, 46, 47], "pp": 35, "accommod": 35, "torchrun": 35, "paralleltyp": 35, "tp": 35, "pptp": 35, "leav": 35, "nproc": 35, "OR": [35, 40, 56], "parallellevel": 35, "visibl": [35, 37], "suppos": 35, "eight": [35, 45], "worker1": 35, "worker2": 35, "illustr": 35, "stage": 35, "microbatch": 35, "inherit": [35, 39, 43], "our": [35, 37, 45], "custom_handl": 35, "base_pippy_handl": 35, "basepippyhandl": 35, "pt_pippi": 35, "initialize_rpc_work": 35, "get_pipline_driv": 35, "super": [35, 38, 50], "local_rank": 35, "device_count": 35, "world_siz": 35, "rout": 35, "rank0": 35, "rank": 35, "fontend": 35, "wish": [35, 51], "input_nam": 35, "input_id": 35, "fx": 35, "trace": 35, "model_typ": 35, "hf": [35, 37], "wise": 35, "blank": 35, "rpc_timeout": 35, "1800": 35, "num_worker_thread": 35, "max_length": 35, "80": 35, "length": [35, 39, 46], "bloom": 35, "pippy_handl": 35, "model_checkpoints_path": 35, "tgz": 35, "progress": 35, "soon": 35, "microsoft": [35, 54], "get_ds_engin": 35, "base_deepspeed_handl": 35, "basedeepspeedhandl": 35, "ds_engin": 35, "ds": 35, "filenam": [35, 38], "dtype": 35, "float16": 35, "replace_with_kernel_inject": 35, "tensor_parallel": 35, "tp_size": 35, "method1": 35, "method2": 35, "ds_build_op": 35, "deepspeed_handl": 35, "advantag": 35, "further": [35, 37, 38, 40, 41], "maxim": 35, "appli": [35, 47], "low_cpu_mem_usag": 35, "checkpoint": 35, "pretrain": [35, 56], "hub": [35, 37, 56], "cach": [35, 37, 40], "caus": 35, "omp_number_thread": 35, "immedi": [35, 39], "incur": 35, "503": 35, "chatgpt": 35, "effect": [35, 46], "action": 35, "reject": 35, "capac": 35, "busi": 35, "jobqueues": 35, "usejobticket": 35, "graviton": 36, "m7g": 36, "4x": 36, "dnnl_default_fpmath_mod": 36, "bf16": 36, "lru_cache_capac": 36, "1024": 36, "synthesi": 36, "meta": [37, 47], "llama3": 37, "besid": 37, "offer": [37, 45, 46, 48], "volum": [37, 56], "faster": [37, 43, 44, 46], "reload": [37, 41], "gate": 37, "huggingface_hub_token": 37, "ahead": [37, 40], "ti": 37, "hugging_face_hub_token": 37, "model_id": 37, "8b": 37, "instruct": [37, 41, 47, 54, 56], "disable_token": 37, "exchang": 37, "my": 37, "max_new_token": 37, "samplingparam": 37, "keyword": 37, "temperatur": 37, "renam": 37, "some_nam": [37, 40], "outsid": [37, 47], "llm_launcher": 37, "3rd": 37, "parti": 37, "team": 37, "7b": 37, "chat": 37, "mistralai": 37, "mistral": 37, "v0": [37, 38, 39], "varieti": 37, "bot": 37, "gpt": 37, "understand": [38, 46, 48, 56], "layout": 38, "familiar": 38, "log4j": [38, 50], "rollingfil": 38, "access_log": [38, 42], "env": [38, 53, 54], "filepattern": 38, "dd": 38, "mmm": 38, "patternlayout": 38, "iso8601": 38, "sizebasedtriggeringpolici": 38, "timebasedtriggeringpolici": 38, "defaultrolloverstrategi": 38, "2018": [38, 39], "13": [38, 41], "56": 38, "976": 38, "backendwork": 38, "64003": 38, "118": 38, "remot": [38, 47, 49, 50], "took": 38, "ts_log": 38, "5p": 38, "stderr": 38, "14": 38, "46": [38, 42], "51": 38, "656": 38, "workerlifecycl": 38, "nnvm": 38, "legacy_json_util": 38, "cc": 38, "209": 38, "symbol": 38, "upgrad": 38, "657": 38, "217": [38, 44], "successfulli": [38, 56, 58], "59": 38, "926": 38, "60": [38, 53], "117": 38, "31": [38, 42], "52": 38, "dlog4j": 38, "configurationfil": 38, "altern": [38, 39], "lightweight": 38, "consid": [38, 45], "recent": 38, "lost": 38, "unexpectedli": 38, "decreas": [39, 45], "refresh": 39, "delet": [39, 42, 53, 58], "managementapisservic": 39, "subfold": [39, 58], "module_nam": 39, "method_nam": 39, "least": 39, "synchron": [39, 56], "acknowledg": 39, "onlin": 39, "response_timeout": 39, "respond": 39, "period": [39, 40], "squeezenet_v1": [39, 50], "sse": 39, "secret": 39, "region": 39, "squeezenet1": 39, "account": [39, 40], "iam": 39, "role": 39, "awss3readonlyaccess": 39, "s3_sse_km": 39, "forgot": 39, "between": [39, 45, 53], "202": 39, "4dc54158": 39, "c6de": 39, "42aa": 39, "b5dd": 39, "ebcb5f721043": 39, "47": [39, 42], "aliv": 39, "ok": 39, "ecd2e502": 39, "382f": 39, "4c3b": 39, "b425": 39, "519fbf6d3b85": 39, "89": 39, "min_work": [39, 56], "max_work": 39, "pend": 39, "exceed": 39, "flavor": 39, "42adc58": 39, "6956": 39, "4198": 39, "ad07": 39, "db6c620c4c1e": 39, "b72b1ea0": 39, "81c6": 39, "4cce": 39, "92c4": 39, "530d3cfe5d4a": 39, "63": 39, "3997ccd4": 39, "ae44": 39, "4570": 39, "b249": 39, "e361b08d3d47": 39, "77": 39, "02t13": 39, "53": 39, "034z": 39, "89247744": 39, "jobqueuestatu": 39, "remainingcapac": 39, "pendingrequest": 39, "noop_2": 39, "metadata": [39, 50], "output_describ": 39, "_is_describ": 39, "start_tim": [39, 40], "is_profiler_en": 39, "enable_torch_profil": [39, 47], "_infer_with_profil": [39, 47], "stop_tim": 39, "handlertim": [39, 40, 41], "customizedmetadata": 39, "9010": 39, "2022": 39, "08t11": 39, "974z": 39, "98972": 39, "data1": 39, "data2": 39, "next_page_token": [39, 58], "next": [39, 45, 54, 56, 58], "pagin": [39, 58], "nextpagetoken": [39, 58], "noop_v0": 39, "bearer": [39, 53], "expir": [39, 53], "broadli": 40, "minut": [40, 53], "metrics_mod": [40, 41], "ts_metrics_mod": 40, "agent": 40, "log_directori": 40, "ts_metric": [40, 42], "model_metr": 40, "releas": [40, 46], "ts_inference_requests_tot": [40, 41], "ts_inference_latency_microsecond": [40, 41], "ts_queue_latency_microsecond": [40, 41], "my_model": 40, "chosen": 40, "model_metrics_auto_detect": 40, "ts_model_metrics_auto_detect": 40, "cold": 40, "subsequ": 40, "often": [40, 46], "mitig": [40, 46], "terminolog": 40, "alias": 40, "nameofcountermetr": 40, "gaug": [40, 41], "nameofgaugemetr": 40, "histogram": 40, "nameofhistogrammetr": 40, "inferencetimeinm": 40, "numberofmetr": 40, "gaugemodelmetricnameexampl": 40, "histogrammodelmetricnameexampl": 40, "toward": 40, "percent": [40, 41], "metrictyp": 40, "semant": 40, "requests2xx": [40, 41, 42], "hostnam": [40, 41, 42], "total": 40, "300": 40, "requests4xx": [40, 41, 42], "400": 40, "requests5xx": [40, 41], "microsecond": [40, 41], "queuetim": [40, 41], "spent": 40, "workerthreadtim": [40, 41], "exclud": 40, "workerloadtim": [40, 41], "workernam": [40, 41], "taken": [40, 51, 56], "cpuutil": [40, 41], "memoryus": [40, 41], "megabyt": [40, 41], "memoryavail": [40, 41], "memoryutil": [40, 41], "diskusag": [40, 41], "gigabyt": [40, 41], "diskutil": [40, 41], "diskavail": [40, 41], "gpumemoryutil": [40, 41], "gpumemoryus": [40, 41], "gpuutil": [40, 41], "predictiontim": [40, 41], "confus": [40, 41], "fetch": [40, 41], "coupl": 40, "name_of_model": 40, "dim1": 40, "dim2": 40, "some_valu": 40, "dimn": 40, "name_n": 40, "value_n": 40, "add_metric_to_cach": 40, "dimension_nam": 40, "metric_typ": 40, "cachingmetr": 40, "newli": 40, "add_or_upd": 40, "dimension_valu": 40, "distance_metr": 40, "distanceinkm": 40, "distanc": 40, "inferencetim": 40, "end_tim": 40, "sizeofimag": 40, "img_siz": 40, "utilization_perc": 40, "callcount": 40, "call_count": 40, "get_metr": 40, "metricscachekeyerror": 40, "gauge_metr": 40, "gaugemetricnam": 40, "gauge_metric_valu": 40, "88665a372f4b": 41, "ant": 41, "054508209228516": 41, "resnet18": [41, 44], "83": 41, "resnet18_1": 41, "4593": 41, "9001": 41, "4592": 41, "5829": 41, "7421875": 41, "82": 41, "93": 41, "290371": 41, "129": 41, "8245": 41, "62109375": 41, "325": 41, "05113983154297": 41, "64": 41, "globoff": 41, "yml": 41, "scrape_interv": 41, "evaluation_interv": 41, "scrape_config": 41, "job_nam": 41, "static_config": 41, "9090": 41, "navig": 41, "graph": [41, 47], "3000": 41, "systemctl": 41, "daemon": [41, 45], "abil": 42, "unintend": 42, "scenario": [42, 45, 47, 53], "ts_enable_model_api": 42, "switch": 42, "enable_model_api": 42, "At": [42, 47, 54], "explicitli": [42, 47], "unabl": 42, "05": 42, "30t21": 42, "625": 42, "epolleventloopgroup": 42, "53514": 42, "405": 42, "626": 42, "timestamp": [42, 51], "1717105563": 42, "methodnotallowedexcept": 42, "41": [42, 44], "098": 42, "36270": 42, "099": 42, "1717105307": 42, "flowchart": 43, "simplifi": [43, 48], "td": 43, "id1": 43, "id13": 43, "id2": 43, "id3": 43, "id4": 43, "id5": 43, "onnx": [43, 47], "id6": 43, "tensorrt": [43, 46, 47], "id7": 43, "id8": 43, "id9": 43, "id10": 43, "id11": 43, "id21": 43, "id20": 43, "id15": 43, "id16": 43, "id14": 43, "id22": 43, "archiev": 43, "id17": 43, "propos": 44, "inclus": 44, "thank": 44, "216": 44, "106": 44, "489": 44, "squeezenet": [44, 50], "1_1": 44, "152": 44, "214": 44, "rcnn": 44, "coco": 44, "148": 44, "person": 44, "mask": 44, "158": 44, "ag_new": 44, "169": 44, "sample_text": 44, "fcn": 44, "101": 44, "193": 44, "deeplabv3": 44, "384": 44, "386": 44, "105": 44, "42": 44, "215": 44, "mmf": 44, "recognit": [44, 50], "charad": 44, "549": 44, "372cc": 44, "mp4": 44, "mgpu": 44, "sample_text_captum_input": [44, 45], "breed": 44, "war": [44, 58, 59], "spin": 45, "thu": [45, 46, 53], "kernel": [45, 47], "drawback": [45, 47], "occupi": [45, 47], "leverag": [45, 47], "benefit": [45, 46], "smi": [45, 46, 54, 56], "exclus": 45, "shutdown": [45, 51], "echo": [45, 55], "quit": 45, "48": 45, "volta": 45, "lead": 45, "failur": [45, 59], "decis": 45, "repres": 45, "workload": [45, 47], "primarili": 45, "investig": 45, "evolv": 45, "g4dn": 45, "4xlarg": 45, "2xlarg": 45, "concentr": 45, "measur": 45, "ab": 45, "still": 45, "left": 45, "five": 45, "median": 45, "overwrit": [45, 47], "accordingli": 45, "huggingface_transform": 45, "bertseqclassif": 45, "10000": [45, 59], "600": 45, "seq_classification_artifact": 45, "skew": [45, 48], "therefor": 45, "neglect": 45, "figur": 45, "steadi": 45, "wors": 45, "interpret": 45, "experi": 45, "bigger": [45, 47], "With": [45, 56], "diminish": 45, "25": 45, "saw": 45, "mix": 45, "certain": 45, "suggest": [45, 46], "highli": [45, 47], "situat": 45, "sole": 45, "focus": [45, 50], "pack": [45, 50], "diagnos": 46, "nlp": 46, "pad": 46, "too": 46, "driver": 46, "oftentim": 46, "bug": 46, "overal": 46, "htop": 46, "obviou": 46, "biggest": 46, "bottleneck": [46, 47], "beyond": 46, "quantifi": 46, "slow": 46, "scope": 46, "async": 46, "hide": 46, "constant": 46, "unnecessarili": 46, "magnitud": 46, "short": [46, 50], "jpeg": 46, "libjpeg": 46, "turbo": 46, "simd": 46, "dali": [46, 47], "old": 46, "k80": 46, "factor": 46, "low": [46, 47], "hang": 46, "fruit": 46, "evalu": 46, "inference_mod": 46, "bump": 46, "fp16": 46, "doubl": 46, "newer": 46, "neglig": 46, "accuraci": 46, "degrad": 46, "technic": 46, "quantiz": [46, 47], "seldom": 46, "suffer": 46, "loss": 46, "explor": 46, "articl": [46, 47], "neural": 46, "int8": 46, "compressor": 46, "sophist": 46, "worth": 46, "signific": 46, "right": [46, 54], "approach": [46, 47], "balanc": 46, "smart": 46, "meet": 46, "sla": 46, "onnxruntim": 46, "lightseq": 46, "ctranslat": 46, "fusion": [46, 47], "distil": [46, 47], "gain": 46, "minilm": 46, "99": 46, "origin": 46, "2x": [46, 47], "sort": 46, "unnecessari": [46, 47], "exhaust": 46, "squeez": 46, "checklist": 47, "trick": 47, "prune": 47, "encourag": 47, "hard": 47, "easiest": 47, "ort": 47, "fastertransform": 47, "favorit": 47, "feel": 47, "pr": 47, "8x": 47, "basi": 47, "10x": 47, "regard": 47, "genai": 47, "link": [47, 50, 56], "acceler": 47, "bit": 47, "ort_sess": 47, "inferencesess": 47, "sess_opt": 47, "treat": 47, "vs": 47, "nn": 47, "transformerencod": 47, "bettertransform": 47, "exce": 47, "speedup": 47, "vari": 47, "batch_delai": 47, "lower": 47, "heavi": 47, "On": 47, "significantli": 47, "theori": 47, "discuss": [47, 50], "quickli": 47, "summar": 47, "hyperthread": 47, "affin": 47, "physic": 47, "numa": 47, "cross": 47, "ex": [47, 50, 53, 54], "contrari": 47, "clearli": 47, "sub": 47, "valuetoset": 47, "uniqu": 47, "around": 47, "portabl": 47, "iter": 47, "popular": 47, "block": 47, "helper": 47, "p50": 47, "p90": 47, "p99": 47, "visit": 47, "app": [47, 50, 54], "fine": [47, 54], "scientist": 48, "analyt": 48, "though": 48, "underli": 48, "plain": 48, "crucial": 48, "assum": [48, 56], "service_envelop": 48, "scala": 49, "javascript": 49, "snap": 50, "pictur": 50, "scene": 50, "identif": 50, "intak": 50, "littl": 50, "weed": 50, "dive": 50, "kind": [50, 56], "themselv": 50, "long": 50, "stori": 50, "ts_config": 50, "model_path1": 50, "model_path2": 50, "log_config": 50, "exit": 50, "model_loc": 50, "talk": 50, "sai": 50, "fanci": 50, "net": 50, "hotdog": 50, "sens": 50, "hot": 50, "name2": 50, "model_location2": 50, "compris": [51, 59], "cfg": 51, "shut": [51, 53], "intention": 51, "resili": 51, "katex": 52, "recommonmark": 52, "sphinxcontrib": 52, "pyyaml": 52, "theme": 52, "ts_disable_token_author": 53, "auth": 53, "disable_token_author": 53, "preced": 53, "tokenauthorizationhandl": 53, "key_fil": 53, "e5ksrm": 53, "16t21": 53, "24": 53, "801167z": 53, "gnrua7d": 53, "801148z": 53, "yv9uqajp": 53, "i_j_itmb": 53, "finhr1fj": 53, "m4m": 53, "5ibi": 53, "THE": 53, "displai": 53, "token_expiration_min": 53, "30": 53, "owner": 53, "adapt": 53, "certifi": 54, "2019": [54, 55], "admin": 54, "anaconda": 54, "powershel": 54, "openjdk17": 54, "unzip": 54, "edit": 54, "jdk": [54, 55], "3_window": 54, "x64_bin": 54, "gui": [54, 56], "wheel": 54, "prebuilt": 54, "click": 54, "whl": 54, "java_hom": 54, "redistribut": 54, "studio": 54, "2015": 54, "2017": 54, "your_install_dr": 54, "program": 54, "corpor": 54, "nvsmi": 54, "prod": 54, "setup_wsl_ubuntu": 55, "bash": 55, "bashrc": 55, "openjdk": 55, "sentencepiec": 55, "conveni": 56, "prerequisit": 56, "chrome": 56, "default_handl": 56, "your_model_nam": 56, "your_model_fil": 56, "mv": 56, "directli": 56, "copi": 56, "your_model_store_dir": 56, "kept": 56, "min": [56, 59], "your_input_fil": 56, "demostr": 56, "public_url": [56, 58], "your_path": 56, "8433": 56, "local_dir": 56, "your_docker_imag": 56, "s3_path": 56, "notic": 56, "proerti": 56, "apart": 56, "lib": 56, "your_custom_handler_py_fil": 56, "fairseq": 56, "your_requirements_txt": 56, "anoth": 56, "wfpredict": 57, "workflow_nam": [57, 58], "myworkflow": [57, 58], "leaf": 57, "dag": [57, 58], "workflownam": 58, "workflowurl": 58, "workflowdag": 58, "m1": [58, 59], "myworkflow1": 58, "myworkflow2": 58, "ensembl": 59, "flow": 59, "m2": 59, "m3": 59, "retri": 59, "model3": 59, "m4": 59, "model4": 59, "pre_process": 59, "syntax": 59, "ridden": 59, "dataflow": 59, "eg": 59, "function1": 59, "function2": 59, "aggregate_func": 59, "workflow_inference_api": 59, "workflow_management_api": 59, "serializ": 59, "arrai": 59}, "objects": {"": [[8, 0, 0, "-", "test"], [10, 0, 0, "-", "ts"]], "ts": [[10, 0, 0, "-", "arg_parser"], [10, 0, 0, "-", "context"], [11, 0, 0, "-", "metrics"], [10, 0, 0, "-", "model_loader"], [10, 0, 0, "-", "model_server"], [12, 0, 0, "-", "model_service"], [10, 0, 0, "-", "model_service_worker"], [13, 0, 0, "-", "protocol"], [10, 0, 0, "-", "service"], [14, 0, 0, "-", "torch_handler"], [19, 0, 0, "-", "utils"], [10, 0, 0, "-", "version"]], "ts.arg_parser": [[10, 1, 1, "", "ArgParser"]], "ts.arg_parser.ArgParser": [[10, 2, 1, "", "extract_args"], [10, 2, 1, "", "model_service_worker_args"], [10, 2, 1, "", "ts_parser"]], "ts.context": [[10, 1, 1, "", "Context"], [10, 1, 1, "", "RequestProcessor"]], "ts.context.Context": [[10, 2, 1, "", "get_all_request_header"], [10, 2, 1, "", "get_request_header"], [10, 2, 1, "", "get_request_id"], [10, 2, 1, "", "get_response_content_type"], [10, 2, 1, "", "get_response_headers"], [10, 2, 1, "", "get_response_status"], [10, 2, 1, "", "get_sequence_id"], [10, 3, 1, "", "metrics"], [10, 3, 1, "", "request_processor"], [10, 2, 1, "", "set_all_response_status"], [10, 2, 1, "", "set_response_content_type"], [10, 2, 1, "", "set_response_header"], [10, 2, 1, "", "set_response_status"], [10, 3, 1, "", "system_properties"]], "ts.context.RequestProcessor": [[10, 2, 1, "", "add_response_property"], [10, 2, 1, "", "get_request_properties"], [10, 2, 1, "", "get_request_property"], [10, 2, 1, "", "get_response_header"], [10, 2, 1, "", "get_response_headers"], [10, 2, 1, "", "get_response_status_code"], [10, 2, 1, "", "get_response_status_phrase"], [10, 2, 1, "", "report_status"]], "ts.metrics": [[11, 0, 0, "-", "dimension"], [11, 0, 0, "-", "metric"], [11, 0, 0, "-", "metric_collector"], [11, 0, 0, "-", "metric_encoder"], [11, 0, 0, "-", "metrics_store"], [11, 0, 0, "-", "process_memory_metric"], [11, 0, 0, "-", "system_metrics"], [11, 0, 0, "-", "unit"]], "ts.metrics.dimension": [[11, 1, 1, "", "Dimension"]], "ts.metrics.dimension.Dimension": [[11, 2, 1, "", "to_dict"]], "ts.metrics.metric": [[11, 1, 1, "", "Metric"]], "ts.metrics.metric.Metric": [[11, 2, 1, "", "reset"], [11, 2, 1, "", "to_dict"], [11, 2, 1, "", "update"]], "ts.metrics.metric_encoder": [[11, 1, 1, "", "MetricEncoder"]], "ts.metrics.metric_encoder.MetricEncoder": [[11, 2, 1, "", "default"]], "ts.metrics.metrics_store": [[11, 1, 1, "", "MetricsStore"]], "ts.metrics.metrics_store.MetricsStore": [[11, 2, 1, "", "add_counter"], [11, 2, 1, "", "add_error"], [11, 2, 1, "", "add_metric"], [11, 2, 1, "", "add_percent"], [11, 2, 1, "", "add_size"], [11, 2, 1, "", "add_time"]], "ts.metrics.process_memory_metric": [[11, 4, 1, "", "check_process_mem_usage"], [11, 4, 1, "", "get_cpu_usage"]], "ts.metrics.system_metrics": [[11, 4, 1, "", "collect_all"], [11, 4, 1, "", "cpu_utilization"], [11, 4, 1, "", "disk_available"], [11, 4, 1, "", "disk_used"], [11, 4, 1, "", "disk_utilization"], [11, 4, 1, "", "gpu_utilization"], [11, 4, 1, "", "memory_available"], [11, 4, 1, "", "memory_used"], [11, 4, 1, "", "memory_utilization"]], "ts.metrics.unit": [[11, 1, 1, "", "Units"]], "ts.model_loader": [[10, 1, 1, "", "ModelLoader"], [10, 1, 1, "", "ModelLoaderFactory"], [10, 1, 1, "", "TsModelLoader"]], "ts.model_loader.ModelLoader": [[10, 2, 1, "", "load"]], "ts.model_loader.ModelLoaderFactory": [[10, 2, 1, "", "get_model_loader"]], "ts.model_loader.TsModelLoader": [[10, 2, 1, "", "load"]], "ts.model_server": [[10, 4, 1, "", "load_properties"], [10, 4, 1, "", "start"]], "ts.model_service": [[12, 0, 0, "-", "model_service"]], "ts.model_service.model_service": [[12, 1, 1, "", "ModelService"], [12, 1, 1, "", "SingleNodeService"]], "ts.model_service.model_service.ModelService": [[12, 2, 1, "", "handle"], [12, 2, 1, "", "inference"], [12, 2, 1, "", "initialize"], [12, 2, 1, "", "ping"], [12, 2, 1, "", "signature"]], "ts.model_service.model_service.SingleNodeService": [[12, 2, 1, "", "inference"]], "ts.model_service_worker": [[10, 1, 1, "", "TorchModelServiceWorker"]], "ts.model_service_worker.TorchModelServiceWorker": [[10, 2, 1, "", "handle_connection"], [10, 2, 1, "", "load_model"], [10, 2, 1, "", "run_server"]], "ts.protocol": [[13, 0, 0, "-", "otf_message_handler"]], "ts.protocol.otf_message_handler": [[13, 4, 1, "", "create_load_model_response"], [13, 4, 1, "", "create_predict_response"], [13, 4, 1, "", "encode_response_headers"], [13, 4, 1, "", "retrieve_msg"], [13, 4, 1, "", "send_intermediate_predict_response"]], "ts.service": [[10, 1, 1, "", "Service"], [10, 4, 1, "", "emit_metrics"]], "ts.service.Service": [[10, 3, 1, "", "context"], [10, 2, 1, "", "predict"], [10, 2, 1, "", "retrieve_data_for_inference"], [10, 2, 1, "", "set_cl_socket"]], "ts.torch_handler": [[14, 0, 0, "-", "base_handler"], [14, 0, 0, "-", "contractions"], [14, 0, 0, "-", "densenet_handler"], [14, 0, 0, "-", "image_classifier"], [14, 0, 0, "-", "image_segmenter"], [14, 0, 0, "-", "object_detector"], [15, 0, 0, "-", "request_envelope"], [14, 0, 0, "-", "text_classifier"], [14, 0, 0, "-", "text_handler"], [16, 0, 0, "-", "unit_tests"], [14, 0, 0, "-", "vision_handler"]], "ts.torch_handler.base_handler": [[14, 1, 1, "", "BaseHandler"], [14, 4, 1, "", "setup_ort_session"]], "ts.torch_handler.base_handler.BaseHandler": [[14, 2, 1, "", "describe_handle"], [14, 2, 1, "", "explain_handle"], [14, 2, 1, "", "get_device"], [14, 2, 1, "", "handle"], [14, 2, 1, "", "inference"], [14, 2, 1, "", "initialize"], [14, 2, 1, "", "postprocess"], [14, 2, 1, "", "preprocess"]], "ts.torch_handler.densenet_handler": [[14, 1, 1, "", "DenseNetHandler"], [14, 4, 1, "", "list_classes_from_module"]], "ts.torch_handler.densenet_handler.DenseNetHandler": [[14, 2, 1, "", "handle"], [14, 2, 1, "", "inference"], [14, 2, 1, "", "initialize"]], "ts.torch_handler.image_classifier": [[14, 1, 1, "", "ImageClassifier"]], "ts.torch_handler.image_classifier.ImageClassifier": [[14, 2, 1, "", "get_max_result_classes"], [14, 5, 1, "", "image_processing"], [14, 2, 1, "", "postprocess"], [14, 2, 1, "", "set_max_result_classes"], [14, 5, 1, "", "topk"]], "ts.torch_handler.image_segmenter": [[14, 1, 1, "", "ImageSegmenter"]], "ts.torch_handler.image_segmenter.ImageSegmenter": [[14, 5, 1, "", "image_processing"], [14, 2, 1, "", "postprocess"]], "ts.torch_handler.object_detector": [[14, 1, 1, "", "ObjectDetector"]], "ts.torch_handler.object_detector.ObjectDetector": [[14, 5, 1, "", "image_processing"], [14, 2, 1, "", "initialize"], [14, 2, 1, "", "postprocess"], [14, 5, 1, "", "threshold"]], "ts.torch_handler.request_envelope": [[15, 0, 0, "-", "base"], [15, 0, 0, "-", "body"], [15, 0, 0, "-", "json"], [15, 0, 0, "-", "kserve"], [15, 0, 0, "-", "kservev2"]], "ts.torch_handler.request_envelope.base": [[15, 1, 1, "", "BaseEnvelope"]], "ts.torch_handler.request_envelope.base.BaseEnvelope": [[15, 2, 1, "", "format_output"], [15, 2, 1, "", "handle"], [15, 2, 1, "", "parse_input"]], "ts.torch_handler.request_envelope.body": [[15, 1, 1, "", "BodyEnvelope"]], "ts.torch_handler.request_envelope.body.BodyEnvelope": [[15, 2, 1, "", "format_output"], [15, 2, 1, "", "parse_input"]], "ts.torch_handler.request_envelope.json": [[15, 1, 1, "", "JSONEnvelope"]], "ts.torch_handler.request_envelope.json.JSONEnvelope": [[15, 2, 1, "", "format_output"], [15, 2, 1, "", "parse_input"]], "ts.torch_handler.request_envelope.kserve": [[15, 1, 1, "", "KServeEnvelope"]], "ts.torch_handler.request_envelope.kserve.KServeEnvelope": [[15, 2, 1, "", "format_output"], [15, 2, 1, "", "parse_input"]], "ts.torch_handler.request_envelope.kservev2": [[15, 1, 1, "", "KServev2Envelope"]], "ts.torch_handler.request_envelope.kservev2.KServev2Envelope": [[15, 2, 1, "", "format_output"], [15, 2, 1, "", "parse_input"]], "ts.torch_handler.text_classifier": [[14, 1, 1, "", "TextClassifier"]], "ts.torch_handler.text_classifier.TextClassifier": [[14, 2, 1, "", "get_insights"], [14, 2, 1, "", "inference"], [14, 5, 1, "", "ngrams"], [14, 2, 1, "", "postprocess"], [14, 2, 1, "", "preprocess"]], "ts.torch_handler.text_handler": [[14, 1, 1, "", "TextHandler"]], "ts.torch_handler.text_handler.TextHandler": [[14, 2, 1, "", "get_source_vocab_path"], [14, 2, 1, "", "get_word_token"], [14, 2, 1, "", "initialize"], [14, 2, 1, "", "summarize_attributions"]], "ts.torch_handler.unit_tests": [[17, 0, 0, "-", "models"], [16, 0, 0, "-", "test_base_handler"], [16, 0, 0, "-", "test_envelopes"], [16, 0, 0, "-", "test_image_classifier"], [16, 0, 0, "-", "test_image_segmenter"], [16, 0, 0, "-", "test_object_detector"], [18, 0, 0, "-", "test_utils"]], "ts.torch_handler.unit_tests.models": [[17, 0, 0, "-", "base_model"]], "ts.torch_handler.unit_tests.models.base_model": [[17, 1, 1, "", "ArgmaxModel"], [17, 4, 1, "", "save_pt_file"]], "ts.torch_handler.unit_tests.models.base_model.ArgmaxModel": [[17, 2, 1, "", "forward"], [17, 5, 1, "", "training"]], "ts.torch_handler.unit_tests.test_base_handler": [[16, 4, 1, "", "handler"], [16, 4, 1, "", "test_batch_handle"], [16, 4, 1, "", "test_inference_with_profiler_works_with_custom_initialize_method"], [16, 4, 1, "", "test_single_handle"]], "ts.torch_handler.unit_tests.test_envelopes": [[16, 4, 1, "", "handle_fn"], [16, 4, 1, "", "test_binary"], [16, 4, 1, "", "test_body"], [16, 4, 1, "", "test_json"], [16, 4, 1, "", "test_json_batch"], [16, 4, 1, "", "test_json_double_batch"]], "ts.torch_handler.unit_tests.test_image_classifier": [[16, 4, 1, "", "context"], [16, 4, 1, "", "handler"], [16, 4, 1, "", "image_bytes"], [16, 4, 1, "", "model_dir"], [16, 4, 1, "", "model_name"], [16, 4, 1, "", "test_handle"], [16, 4, 1, "", "test_handle_explain"]], "ts.torch_handler.unit_tests.test_image_segmenter": [[16, 4, 1, "", "context"], [16, 4, 1, "", "handler"], [16, 4, 1, "", "image_bytes"], [16, 4, 1, "", "model_dir"], [16, 4, 1, "", "model_name"], [16, 4, 1, "", "test_handle"]], "ts.torch_handler.unit_tests.test_object_detector": [[16, 4, 1, "", "context"], [16, 4, 1, "", "handler"], [16, 4, 1, "", "image_bytes"], [16, 4, 1, "", "model_dir"], [16, 4, 1, "", "model_name"], [16, 4, 1, "", "test_handle"]], "ts.torch_handler.unit_tests.test_utils": [[18, 0, 0, "-", "mock_context"]], "ts.torch_handler.unit_tests.test_utils.mock_context": [[18, 1, 1, "", "MockContext"]], "ts.torch_handler.unit_tests.test_utils.mock_context.MockContext": [[18, 2, 1, "", "get_request_header"]], "ts.torch_handler.vision_handler": [[14, 1, 1, "", "VisionHandler"]], "ts.torch_handler.vision_handler.VisionHandler": [[14, 2, 1, "", "get_insights"], [14, 2, 1, "", "initialize"], [14, 2, 1, "", "preprocess"]], "ts.utils": [[19, 0, 0, "-", "timeit_decorator"], [19, 0, 0, "-", "util"]], "ts.utils.timeit_decorator": [[19, 4, 1, "", "timeit"]], "ts.utils.util": [[19, 1, 1, "", "PT2Backend"], [19, 6, 1, "", "PredictionException"], [19, 4, 1, "", "check_valid_pt2_backend"], [19, 4, 1, "", "deprecated"], [19, 4, 1, "", "get_yaml_config"], [19, 4, 1, "", "list_classes_from_module"], [19, 4, 1, "", "load_label_mapping"], [19, 4, 1, "", "map_class_to_label"]], "ts.utils.util.PT2Backend": [[19, 5, 1, "", "AOT_CUDAGRAPHS"], [19, 5, 1, "", "AOT_EAGER"], [19, 5, 1, "", "AOT_NVFUSER"], [19, 5, 1, "", "EAGER"], [19, 5, 1, "", "FX2TRT"], [19, 5, 1, "", "INDUCTOR"], [19, 5, 1, "", "IPEX"], [19, 5, 1, "", "NVFUSER"], [19, 5, 1, "", "OFI"], [19, 5, 1, "", "ONNXRT"], [19, 5, 1, "", "OPENVINO"], [19, 5, 1, "", "TORCHXLA_TRACE_ONCE"]]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:method", "3": "py:property", "4": "py:function", "5": "py:attribute", "6": "py:exception"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "method", "Python method"], "3": ["py", "property", "Python property"], "4": ["py", "function", "Python function"], "5": ["py", "attribute", "Python attribute"], "6": ["py", "exception", "Python exception"]}, "titleterms": {"faq": 0, "s": [0, 23, 40], "gener": [0, 38, 40, 46], "doe": 0, "torchserv": [0, 1, 3, 23, 25, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 40, 43, 45, 47, 49, 50, 51, 53, 54, 55, 56, 59], "api": [0, 3, 4, 21, 23, 25, 26, 29, 31, 33, 39, 40, 41, 42, 49, 53, 57, 58], "follow": [0, 24], "some": 0, "rest": [0, 29, 49], "standard": 0, "how": [0, 2, 3, 35, 43, 53], "us": [0, 3, 22, 23, 29, 35, 42, 54, 56], "product": [0, 54], "what": [0, 22, 34], "differ": 0, "between": 0, "python": [0, 3, 24, 25, 27, 29, 31, 34], "web": 0, "app": 0, "framework": 0, "like": 0, "flask": 0, "django": 0, "ar": [0, 3], "ani": 0, "sampl": 0, "model": [0, 3, 17, 23, 24, 25, 27, 29, 35, 37, 39, 40, 42, 43, 44, 46, 50, 51, 56, 59], "avail": 0, "support": [0, 2, 22, 23, 37], "other": [0, 25, 37], "base": [0, 15, 29, 40], "program": 0, "languag": 0, "than": 0, "benefit": 0, "have": [0, 3], "over": 0, "aw": 0, "multi": 0, "server": [0, 31, 35, 41], "decod": 0, "intern": [0, 34], "infer": [0, 3, 23, 28, 33, 35, 46, 57], "respons": [0, 35], "client": [0, 29, 31], "side": [0, 31, 35], "perform": [0, 47], "do": [0, 2, 34, 36], "i": [0, 3], "improv": 0, "cpu": 0, "deploy": [0, 3, 37], "config": [0, 3, 23, 25, 35, 38], "can": [0, 3], "run": [0, 24, 45, 50], "port": [0, 3, 25], "default": [0, 1, 23, 27, 28, 39, 40, 42], "8080": [0, 3], "8081": [0, 3], "resolv": [0, 3], "specif": [0, 3, 25, 27, 59], "depend": [0, 3, 24, 27, 56], "deploi": [0, 37, 56], "kubernet": 0, "elb": 0, "asg": 0, "backup": 0, "restor": 0, "state": 0, "build": [0, 24], "imag": 0, "from": [0, 27, 29, 54, 55], "sourc": [0, 54, 55], "branch": [0, 24], "commit": 0, "id": 0, "creat": [0, 27, 30, 40], "dockerfil": 0, "dev": 0, "order": 0, "properti": [0, 23, 25, 38, 59], "path": 0, "model_stor": 0, "load_model": 0, "curl": [0, 33, 57], "make": 0, "request": [0, 3, 27, 48], "add": [0, 3, 40], "custom": [0, 3, 25, 27, 38, 40, 50, 56], "an": [0, 27], "exist": 0, "pass": 0, "multipl": [0, 27, 50], "call": 0, "my": [0, 3], "handler": [0, 1, 23, 27, 28, 29, 59], "return": [0, 27], "output": 0, "enhanc": 0, "alwai": 0, "write": [0, 27], "ones": 0, "Is": 0, "possibl": 0, "hug": [0, 35], "face": [0, 35], "archiv": [0, 3, 23, 24, 27, 29], "mar": [0, 3], "file": [0, 3, 25, 34, 35, 50, 59], "docker": [0, 23], "contain": [0, 23], "serial": 0, "singl": 0, "download": 0, "regist": [0, 3, 39, 58], "s3": 0, "presign": 0, "v4": 0, "url": 0, "host": 0, "set": [0, 39, 42, 53], "batch": [0, 23], "size": [0, 40], "sagemak": 0, "kei": 0, "paramet": [0, 25], "tune": [0, 35], "why": 0, "initi": 0, "so": 0, "slow": 0, "announc": 1, "secur": [1, 2, 56], "chang": [1, 3], "basic": 1, "featur": [1, 3, 28, 50], "exampl": [1, 22, 25, 31, 33, 36, 42, 57], "advanc": [1, 25, 27, 50], "polici": 2, "version": [2, 10, 39], "we": 2, "import": [2, 34], "guidelin": 2, "report": 2, "vulner": 2, "troubleshoot": [3, 54], "guid": [3, 47], "issu": [3, 52, 59], "fail": 3, "bind": 3, "address": [3, 25], "http": [3, 35, 52], "127": 3, "0": [3, 55], "1": [3, 35], "alreadi": 3, "java": [3, 34], "lang": 3, "nosuchmethoderror": 3, "when": 3, "start": [3, 27, 29, 40], "473": 3, "unabl": 3, "send": 3, "big": 3, "snapshot": [3, 51], "relat": [3, 59], "disabl": [3, 53], "stop": [3, 29], "after": 3, "restart": 3, "invalidsnapshotexcept": 3, "except": 3, "where": 3, "store": [3, 29], "temp": 3, "directori": [3, 34], "conflictstatusexcept": 3, "error": [3, 27], "code": [3, 24, 27, 29], "409": 3, "downloadmodelexcept": 3, "400": 3, "modelnotfoundexcept": 3, "404": 3, "serviceunavailableexcept": 3, "503": 3, "ad": 3, "requir": 3, "txt": 3, "packag": [3, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 24, 25], "list": [3, 39, 58], "get": [3, 29, 40], "instal": [3, 24, 27, 29, 36, 54, 55], "backend": [3, 24, 25, 34, 40], "worker": [3, 25, 39], "monitor": 3, "thread": 3, "interrupt": 3, "process": 3, "di": 3, "develop": [4, 26, 54], "serv": [5, 29, 34, 35, 39, 50, 56], "run_circleci_test": 6, "modul": [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 27], "setup": [7, 23, 55], "test": [8, 22, 56], "submodul": [8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20], "regression_test": 8, "content": [8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 23, 26, 27, 40, 50, 54, 55], "torchserve_san": 9, "ts": [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 24], "subpackag": [10, 14, 16], "arg_pars": 10, "context": 10, "model_load": 10, "model_serv": 10, "model_service_work": 10, "servic": [10, 21, 26, 27, 50], "metric": [11, 25, 40, 41, 50], "dimens": [11, 40], "metric_collector": 11, "metric_encod": 11, "metrics_stor": 11, "process_memory_metr": 11, "system_metr": 11, "unit": 11, "model_servic": 12, "protocol": 13, "otf_message_handl": 13, "torch_handl": [14, 15, 16, 17, 18], "base_handl": 14, "contract": 14, "densenet_handl": 14, "image_classifi": [14, 28], "image_segment": [14, 28], "object_detector": [14, 28], "text_classifi": [14, 28], "text_handl": 14, "vision_handl": 14, "request_envelop": 15, "bodi": 15, "json": [15, 28], "kserv": [15, 33], "kservev2": 15, "unit_test": [16, 17, 18], "test_base_handl": 16, "test_envelop": 16, "test_image_classifi": 16, "test_image_segment": 16, "test_mnist_kf": 16, "test_object_detector": 16, "base_model": 17, "test_util": 18, "mock_context": 18, "util": [19, 20], "timeit_decor": 19, "ts_script": 20, "api_util": 20, "backend_util": 20, "frontend_util": 20, "install_depend": 20, "install_from_src": 20, "marsgen": 20, "modelarchiver_util": 20, "print_env_info": 20, "regression_util": 20, "sanity_util": 20, "shell_util": 20, "torchserve_grpc_cli": 20, "tsutil": 20, "validate_model_on_gpu": 20, "workflow_archiver_util": 20, "appl": 22, "silicon": 22, "experiment": [22, 36], "resnet": [22, 23], "18": [22, 55], "mp": [22, 45], "On": 22, "mac": 22, "m1": 22, "pro": 22, "conda": 22, "thi": [23, 27, 50, 54, 55], "document": [23, 27, 50, 54, 55], "introduct": [23, 40], "prerequisit": [23, 38, 54], "152": 23, "configur": [23, 25, 40, 53], "demo": 23, "torch": [23, 29], "manag": [23, 39, 58], "through": [23, 29], "coverag": 24, "To": [24, 36], "check": [24, 33], "stabil": 24, "saniti": 24, "suit": 24, "frontend": [24, 34, 40], "command": [24, 25, 50, 54], "pytest": 24, "lint": 24, "IT": 24, "markdown": [24, 52], "link": 24, "checker": 24, "environ": 25, "variabl": 25, "line": [25, 42, 50], "jvm": 25, "option": 25, "load": [25, 35, 43], "startup": 25, "listen": 25, "grpc": [25, 29, 31, 35], "max": 25, "connect": 25, "ag": 25, "enabl": [25, 38, 42], "ssl": 25, "cross": 25, "origin": 25, "resourc": [25, 47], "share": 25, "cor": 25, "prefer": 25, "direct": 25, "buffer": 25, "allow": 25, "restrict": 25, "access": [25, 38], "limit": 25, "gpu": [25, 27, 56], "usag": [25, 48, 54], "nvidia": [25, 45], "control": [25, 42], "visibl": 25, "basehandl": 27, "level": 27, "entri": 27, "point": 27, "class": 27, "scratch": 27, "predict": [27, 29, 33, 57], "explan": [27, 33], "captum": 27, "extend": 27, "handl": 27, "execut": 27, "common": 28, "index_to_nam": 28, "contribut": [28, 29, 48], "For": [29, 54], "debian": 29, "system": [29, 46], "maco": 29, "window": [29, 54, 55], "inspect": 29, "log": [29, 38, 40, 50], "debug": 29, "github": [30, 52], "action": 30, "step": 30, "stream": [31, 35], "descript": [33, 39], "health": 33, "architectur": 34, "terminolog": 34, "pytorch": [34, 35, 47, 56], "thei": 34, "core": 34, "engin": 34, "larg": 35, "work": 35, "pippi": 35, "nativ": 35, "solut": 35, "deepspe": 35, "mii": 35, "acceler": 35, "tip": 35, "reduc": 35, "latenc": 35, "yaml": 35, "sensit": 35, "applic": 35, "job": 35, "ticket": 35, "via": 35, "chunk": 35, "encod": 35, "linux": [36, 55], "aarch64": 36, "optim": [36, 46, 47], "llm": 37, "quickstart": 37, "wai": [37, 42], "type": [38, 40], "modifi": 38, "behavior": 38, "provid": 38, "asynchron": 38, "encrypt": 39, "scale": 39, "describ": [39, 58], "unregist": [39, 58], "token": [39, 53], "author": [39, 53], "mode": [40, 42, 56], "prometheu": [40, 41], "legaci": 40, "auto": 40, "detect": 40, "format": 40, "object": 40, "function": 40, "without": 40, "time": 40, "percentag": 40, "counter": 40, "A": 40, "grafana": 41, "three": 42, "cmd": 42, "zoo": [44, 56], "benchmark": [45, 47], "g4": 45, "instanc": 45, "p3": 45, "summari": 45, "checklist": 46, "profil": 47, "more": 47, "envelop": 48, "overview": 50, "technic": 50, "detail": 50, "interfac": 50, "argument": 50, "prioriti": 50, "cf": 52, "com": 52, "ryanfox": 52, "sphinx": 52, "tabl": 52, "36": 52, "note": 53, "binari": [54, 55], "below": 54, "purpos": 54, "subsystem": 55, "wsl": 55, "ubuntu": 55, "4": 55, "case": 56, "eager": 56, "script": 56, "readymad": 56, "third": 56, "parti": 56, "ab": 56, "workflow": [57, 58, 59], "dag": 59, "sequenti": 59, "parallel": 59, "doc": 59, "known": 59}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 6, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.todo": 2, "sphinx.ext.viewcode": 1, "sphinx": 56}}) \ No newline at end of file +Search.setIndex({"docnames": ["FAQs", "README", "Security", "Troubleshooting", "api/dev_api", "api/modules", "api/run_circleci_tests", "api/setup", "api/test", "api/torchserve_sanity", "api/ts", "api/ts.metrics", "api/ts.model_service", "api/ts.protocol", "api/ts.torch_handler", "api/ts.torch_handler.request_envelope", "api/ts.torch_handler.unit_tests", "api/ts.torch_handler.unit_tests.models", "api/ts.torch_handler.unit_tests.test_utils", "api/ts.utils", "api/ts_scripts", "apis", "apple_silicon_support", "batch_inference_with_ts", "code_coverage", "configuration", "contents", "custom_service", "default_handlers", "getting_started", "github_actions", "grpc_api", "index", "inference_api", "internals", "large_model_inference", "linux_aarch64", "llm_deployment", "logging", "management_api", "metrics", "metrics_api", "model_api_control", "model_loading", "model_zoo", "nvidia_mps", "performance_checklist", "performance_guide", "request_envelopes", "rest_api", "server", "snapshot", "sphinx/requirements", "token_authorization_api", "torchserve_on_win_native", "torchserve_on_wsl", "use_cases", "workflow_inference_api", "workflow_management_api", "workflows"], "filenames": ["FAQs.md", "README.md", "Security.md", "Troubleshooting.md", "api/dev_api.rst", "api/modules.rst", "api/run_circleci_tests.rst", "api/setup.rst", "api/test.rst", "api/torchserve_sanity.rst", "api/ts.rst", "api/ts.metrics.rst", "api/ts.model_service.rst", "api/ts.protocol.rst", "api/ts.torch_handler.rst", "api/ts.torch_handler.request_envelope.rst", "api/ts.torch_handler.unit_tests.rst", "api/ts.torch_handler.unit_tests.models.rst", "api/ts.torch_handler.unit_tests.test_utils.rst", "api/ts.utils.rst", "api/ts_scripts.rst", "apis.rst", "apple_silicon_support.md", "batch_inference_with_ts.md", "code_coverage.md", "configuration.md", "contents.rst", "custom_service.md", "default_handlers.md", "getting_started.md", "github_actions.md", "grpc_api.md", "index.rst", "inference_api.md", "internals.md", "large_model_inference.md", "linux_aarch64.md", "llm_deployment.md", "logging.md", "management_api.md", "metrics.md", "metrics_api.md", "model_api_control.md", "model_loading.md", "model_zoo.md", "nvidia_mps.md", "performance_checklist.md", "performance_guide.md", "request_envelopes.md", "rest_api.md", "server.md", "snapshot.md", "sphinx/requirements.txt", "token_authorization_api.md", "torchserve_on_win_native.md", "torchserve_on_wsl.md", "use_cases.md", "workflow_inference_api.md", "workflow_management_api.md", "workflows.md"], "titles": ["FAQ\u2019S", "\u2757ANNOUNCEMENT: Security Changes\u2757", "Security Policy", "Troubleshooting Guide", "<no title>", "serve", "run_circleci_tests module", "setup module", "test package", "torchserve_sanity module", "ts package", "ts.metrics package", "ts.model_service package", "ts.protocol package", "ts.torch_handler package", "ts.torch_handler.request_envelope package", "ts.torch_handler.unit_tests package", "ts.torch_handler.unit_tests.models package", "ts.torch_handler.unit_tests.test_utils package", "ts.utils package", "ts_scripts package", "<no title>", "Apple Silicon Support", "Batch Inference with TorchServe", "Code Coverage", "Advanced configuration", "<no title>", "Custom Service", "TorchServe default inference handlers", "Getting started", "GitHub Actions for TorchServe", "TorchServe gRPC API", "TorchServe", "Inference API", "TorchServe internals", "Serving large models with Torchserve", "TorchServe on linux aarch64 - Experimental", "LLM Deployment with TorchServe", "Logging in Torchserve", "Management API", "TorchServe Metrics", "Metrics API", "Model API Control", "How to load a model in TorchServe", "Model Zoo", "Running TorchServe with NVIDIA MPS", "Model Inference Optimization Checklist", "Performance Guide", "Request Envelopes", "TorchServe REST API", "Running TorchServe", "TorchServe model snapshot", "cf. https://github.com/ryanfox/sphinx-markdown-tables/issues/36", "TorchServe token authorization API", "TorchServe on Windows", "TorchServe on Windows Subsystem for Linux (WSL)", "Torchserve Use Cases", "Workflow Inference API", "Management API", "TorchServe Workflows"], "terms": {"content": [0, 3, 4, 5, 25, 33, 37, 39, 47], "thi": [0, 1, 2, 3, 10, 11, 14, 15, 17, 19, 22, 24, 25, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 44, 45, 46, 47, 51, 53, 56, 58, 59], "document": [0, 1, 2, 22, 29, 31, 33, 34, 35, 37, 38, 39, 42, 45, 56], "relev": [0, 3, 10, 34, 59], "readm": [0, 10, 23], "compliant": [0, 49], "openapi": [0, 33, 39, 49], "3": [0, 22, 23, 25, 29, 30, 31, 33, 35, 36, 37, 38, 39, 41, 42, 44, 45, 49, 52, 53, 59], "0": [0, 2, 10, 11, 14, 15, 18, 22, 23, 25, 27, 28, 29, 31, 33, 35, 37, 38, 39, 40, 41, 42, 44, 47, 49, 52, 53, 54, 56], "your": [0, 1, 2, 3, 23, 25, 27, 28, 29, 32, 35, 37, 38, 39, 43, 45, 46, 47, 48, 50, 54, 56], "case": [0, 3, 11, 16, 25, 26, 27, 29, 31, 32, 33, 35, 40, 45, 46, 47, 50, 59], "you": [0, 1, 2, 3, 23, 25, 27, 28, 29, 33, 35, 36, 37, 38, 39, 40, 41, 43, 46, 47, 48, 49, 50, 51, 54, 56, 58], "abl": [0, 27, 35, 37, 47, 53, 54, 56], "mechan": [0, 25], "standalon": [0, 25, 37], "refer": [0, 1, 3, 23, 27, 29, 31, 34, 35, 40, 41, 42, 44, 45, 47, 50, 54, 56, 59], "cloud": [0, 25, 32, 34, 48], "cloudform": 0, "main": [0, 22, 23, 29, 31, 34, 47, 53, 54], "purpos": [0, 38], "serv": [0, 1, 4, 14, 15, 21, 22, 23, 24, 25, 26, 27, 31, 32, 33, 37, 38, 41, 42, 44, 45, 47, 48, 53, 54, 55, 57, 59], "via": [0, 1, 25, 27, 33, 40, 47, 50], "http": [0, 2, 10, 15, 22, 23, 25, 29, 30, 31, 33, 34, 37, 38, 39, 40, 41, 42, 50, 53, 54, 55, 56, 57, 58], "netti": [0, 22, 25], "engin": [0, 27, 39, 46, 56], "issu": [0, 46], "581": 0, "569": 0, "variou": [0, 14, 32, 43, 47], "provid": [0, 3, 10, 14, 23, 25, 27, 28, 31, 34, 35, 37, 39, 40, 45, 46, 47, 48, 50, 53, 58, 59], "out": [0, 15, 23, 25, 27, 28, 29, 37, 39, 46, 47, 50], "box": [0, 1, 14, 23, 28, 47], "checkout": [0, 30], "zoo": [0, 1, 26, 31], "list": [0, 1, 10, 11, 12, 14, 15, 19, 21, 22, 25, 26, 27, 28, 31, 33, 34, 35, 37, 40, 44, 48, 59], "all": [0, 2, 10, 11, 12, 14, 15, 17, 18, 19, 22, 23, 24, 25, 27, 28, 29, 32, 33, 34, 35, 37, 38, 39, 46, 47, 50, 54, 56, 58, 59], "also": [0, 3, 11, 14, 15, 25, 27, 28, 29, 31, 34, 35, 36, 37, 38, 40, 47, 50, 53, 54, 56], "check": [0, 1, 2, 3, 21, 22, 26, 28, 29, 31, 35, 39, 40, 46, 53, 56], "exampl": [0, 3, 16, 21, 23, 26, 27, 28, 29, 30, 32, 34, 35, 37, 38, 39, 40, 44, 45, 46, 47, 48, 50, 53, 56, 59], "folder": [0, 11, 25, 29, 34, 39, 50, 58], "No": [0, 25, 28, 43], "As": [0, 2, 27, 28, 29, 38, 45, 46], "now": [0, 1, 22, 23, 25, 29, 39, 42, 50, 53, 56], "onli": [0, 11, 17, 25, 27, 28, 29, 31, 32, 33, 35, 39, 40, 41, 42, 45, 46, 47, 48, 49, 50, 53, 54, 57, 58, 59], "deriv": [0, 15, 27, 28], "howev": [0, 25, 41, 53, 54, 56], "pytorch": [0, 1, 3, 14, 22, 23, 25, 27, 29, 31, 32, 33, 38, 39, 42, 43, 44, 46, 50, 52, 53, 54, 55, 57, 59], "It": [0, 12, 14, 15, 19, 24, 27, 28, 33, 35, 39, 41, 45, 46, 47, 50, 56, 58, 59], "ha": [0, 25, 27, 34, 35, 36, 39, 42, 43, 45, 47, 48, 50, 54, 56, 58], "new": [0, 22, 27, 28, 30, 31, 33, 35, 39, 40, 48, 53, 56], "featur": [0, 22, 23, 33, 35, 42, 51, 53], "snapshot": [0, 1, 22, 26, 50, 54, 59], "version": [0, 3, 4, 5, 19, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 33, 34, 35, 38, 42, 46, 50, 51, 56, 59], "By": [0, 2, 3, 25, 27, 31, 35, 40, 41, 49, 54], "utf": [0, 25, 33, 35], "8": [0, 23, 25, 29, 30, 33, 35, 38, 41, 44, 45, 58], "encod": [0, 2, 10, 11, 33, 34, 47], "string": [0, 1, 10, 12, 14, 25, 28, 40, 59], "If": [0, 2, 3, 23, 25, 27, 29, 33, 34, 35, 37, 38, 39, 40, 46, 47, 50, 51, 54, 56, 58], "convert": [0, 2, 14, 15, 25, 47], "byte": [0, 3, 25, 57, 59], "need": [0, 14, 17, 23, 24, 27, 34, 35, 37, 39, 40, 45, 47, 48, 54, 59], "codec": [0, 13], "specifi": [0, 3, 14, 15, 25, 27, 29, 30, 35, 39, 40, 42, 43, 50, 51, 54, 58], "github": [0, 2, 22, 23, 29, 31, 34, 47, 54, 55, 56], "com": [0, 2, 23, 25, 29, 31, 33, 34, 41, 54, 55, 56, 57], "blob": [0, 34, 56], "master": [0, 23, 29, 30, 33, 34, 55, 56, 57], "nmt": [0, 56], "_": [0, 3, 14, 23, 25, 27, 28, 31, 33, 34, 35, 37, 38, 39, 40, 41, 46, 48, 50, 52, 53, 54, 56, 58], "transform": [0, 23, 27, 35, 45, 46, 47], "py": [0, 2, 3, 14, 18, 24, 25, 27, 28, 29, 30, 31, 34, 35, 36, 45, 47, 48, 54, 55, 56], "guid": [0, 2, 26, 29, 32, 37, 55], "heavili": 0, "influenc": [0, 46], "launcher": [0, 37, 47], "core": [0, 46, 47], "pin": [0, 46, 47], "we": [0, 3, 23, 28, 29, 35, 37, 38, 45, 47, 50, 56], "recommend": [0, 2, 28, 31, 33, 35], "cpu_launcher_en": [0, 47], "true": [0, 2, 3, 10, 11, 14, 22, 23, 25, 27, 33, 35, 38, 39, 40, 42, 47, 53, 56], "cpu_launcher_arg": [0, 47], "use_logical_cor": [0, 47], "more": [0, 1, 2, 3, 16, 23, 25, 27, 28, 29, 33, 35, 36, 39, 41, 44, 45, 46, 50, 56, 59], "background": [0, 25], "found": [0, 36, 40, 41, 47], "blog": [0, 36, 46, 47], "post": [0, 3, 12, 14, 23, 25, 27, 33, 34, 35, 37, 38, 39, 42, 46, 47, 54, 56, 57, 58], "configur": [0, 1, 2, 3, 10, 26, 31, 33, 35, 38, 39, 41, 42, 47, 49, 50, 51, 56, 57, 58], "ye": [0, 43], "environ": [0, 2, 3, 24, 30, 35, 39, 40, 42, 45, 47, 51, 53, 54, 55, 56], "variabl": [0, 2, 3, 35, 39, 40, 42, 47, 51, 53, 54], "detail": [0, 2, 3, 25, 27, 28, 31, 33, 35, 36, 39, 40, 41, 44, 45, 47, 54, 56, 58, 59], "requir": [0, 2, 23, 25, 27, 29, 33, 34, 35, 38, 39, 43, 46, 47, 48, 50, 53, 56, 59], "txt": [0, 25, 34, 35, 44, 45, 56], "while": [0, 3, 17, 25, 34, 35, 45, 46, 47, 50, 51, 56], "r": [0, 3, 35], "flag": [0, 3, 23, 25, 40, 51, 53, 56], "extra": [0, 3, 25, 27, 29, 35, 47, 54, 56], "helm": [0, 1], "chart": [0, 1], "node": [0, 24, 25, 35, 54, 56, 57, 59], "ec2": [0, 39], "cluster": [0, 34], "There": [0, 3, 25, 35, 38, 39, 43, 47, 50, 53], "format": [0, 1, 10, 14, 15, 25, 27, 33, 35, 39, 41, 46, 47, 48, 50], "templat": 0, "here": [0, 11, 15, 23, 27, 28, 29, 34, 35, 37, 39, 40, 41, 47, 50, 54, 56], "type": [0, 2, 3, 10, 11, 12, 14, 15, 25, 27, 31, 33, 35, 37, 39, 41, 42, 43, 44, 45, 46, 50, 53, 59], "behind": [0, 47, 48], "elast": 0, "loadbalanc": 0, "preserv": [0, 51], "runtim": [0, 2, 10, 14, 23, 27, 31, 39, 47, 51, 58], "across": [0, 14, 48, 51], "session": [0, 51], "instanc": [0, 17, 27, 29, 35, 36, 37, 39, 51, 53], "experienc": [0, 51], "either": [0, 2, 3, 25, 27, 34, 35, 37, 39, 40, 47, 51, 53], "plan": [0, 27, 29, 51, 54, 56], "unplan": [0, 51], "servic": [0, 1, 3, 4, 5, 12, 14, 19, 23, 25, 31, 34, 35, 38, 39, 45, 47, 49, 51], "stop": [0, 33, 34, 42, 50, 51], "its": [0, 3, 27, 35, 37, 45, 47, 51], "upon": [0, 51], "restart": [0, 40, 51, 54], "These": [0, 1, 2, 3, 23, 25, 34, 38, 39, 46, 47, 48, 50, 56, 59], "save": [0, 31, 32, 34, 38, 51, 56], "util": [0, 2, 4, 5, 10, 14, 23, 26, 27, 31, 33, 34, 35, 40, 45, 46, 47, 50, 54, 59], "script": [0, 2, 3, 11, 25, 27, 30, 35, 37, 39, 44, 47, 54], "hardwar": [0, 2, 22, 35, 45, 46, 47], "gpu": [0, 1, 10, 11, 12, 22, 23, 24, 28, 29, 30, 32, 34, 35, 37, 39, 40, 45, 46, 47, 55], "compat": [0, 12, 14, 34, 40, 46, 47], "A": [0, 1, 2, 10, 12, 14, 15, 22, 25, 27, 33, 34, 39, 47, 48, 50, 51, 54, 56, 59], "could": [0, 31, 33, 34, 35, 38, 40, 45, 46], "cuda": [0, 24, 25, 27, 29, 30, 35, 37, 45, 47, 55], "well": [0, 25, 37, 38, 45, 46, 48, 56], "build_imag": [0, 23], "sh": [0, 23, 39], "appropri": [0, 45, 53], "option": [0, 10, 27, 29, 33, 35, 37, 39, 40, 42, 43, 46, 50, 56, 58], "help": [0, 35, 41, 45, 46, 47, 50, 54, 56], "To": [0, 3, 22, 23, 25, 27, 28, 29, 31, 33, 35, 37, 38, 39, 40, 41, 44, 45, 47, 48, 49, 50, 57, 58], "command": [0, 10, 22, 23, 29, 30, 31, 33, 35, 37, 39, 41, 42, 45, 53, 56], "b": [0, 1, 10, 40, 48, 50, 53, 56], "branch_nam": 0, "commit_id": 0, "tag": [0, 14], "t": [0, 14, 22, 23, 25, 28, 29, 33, 34, 37, 38, 39, 53, 56, 57], "tagnam": 0, "latest": [0, 23, 30, 46, 56], "The": [0, 2, 3, 10, 14, 15, 19, 23, 24, 25, 27, 29, 33, 34, 35, 37, 38, 39, 40, 41, 42, 43, 45, 46, 47, 48, 49, 50, 51, 53, 54, 56, 57, 58, 59], "instal": [0, 1, 22, 23, 25, 30, 31, 32, 34, 35, 37, 41, 56], "where": [0, 14, 25, 30, 31, 33, 34, 35, 40, 42, 45, 46, 47, 50, 51, 53], "pypi": [0, 2, 34, 36, 54], "distribut": [0, 27, 30, 35], "look": [0, 23, 35, 37, 39, 46, 47, 48, 50], "accord": [0, 45], "doc": [0, 23, 29, 33, 34, 35, 40, 41, 56, 57], "overrid": [0, 11, 14, 25, 27, 39, 40, 50, 59], "store": [0, 10, 22, 23, 25, 31, 34, 40, 42, 50, 51, 53, 54, 56], "load": [0, 1, 2, 10, 12, 13, 14, 16, 18, 19, 22, 27, 31, 33, 34, 37, 38, 39, 40, 42, 45, 47, 50, 53, 56], "mandatori": [0, 50], "argument": [0, 10, 15, 25, 27, 37, 40], "dure": [0, 10, 25, 27, 29, 35, 39, 40], "start": [0, 1, 2, 10, 11, 22, 23, 25, 30, 31, 32, 33, 34, 35, 37, 38, 39, 41, 42, 43, 45, 46, 47, 49, 50, 51, 53, 54, 56], "defin": [0, 10, 11, 12, 14, 17, 19, 25, 27, 29, 30, 33, 35, 38, 40, 47, 50, 59], "overridden": [0, 17, 25], "line": [0, 10, 39, 40, 53, 54], "manag": [0, 1, 21, 22, 25, 26, 27, 29, 31, 34, 48, 49, 51, 53, 54, 56, 59], "decid": 0, "which": [0, 3, 11, 12, 19, 23, 25, 27, 29, 30, 33, 34, 35, 37, 40, 42, 45, 47, 48, 50, 51, 53, 54, 56, 59], "relationship": 0, "w": [0, 1, 14, 28, 38, 41], "ie": [0, 27, 35], "tool": [0, 1, 2, 27, 29, 31, 32, 34, 35, 45, 46, 56], "postman": [0, 56], "insomnia": 0, "even": [0, 46], "find": [0, 2, 19, 27, 34, 35, 39, 45, 46, 47], "plugin": [0, 2, 22, 34, 56], "sdk": [0, 34], "data": [0, 1, 2, 10, 12, 14, 15, 27, 29, 31, 33, 34, 35, 37, 39, 46, 47, 48, 56, 59], "valu": [0, 2, 3, 10, 11, 19, 25, 39, 40, 45, 47, 50, 54, 58, 59], "pair": [0, 11, 40, 50], "object": [0, 1, 10, 11, 12, 14, 15, 18, 25, 27, 28, 29, 34, 39, 44, 50, 56, 57, 59], "would": [0, 14, 25, 29, 30, 35, 40, 46, 50, 59], "modifi": [0, 11, 35, 47, 50, 53], "postprocess": [0, 12, 14, 27, 35, 38, 39, 58, 59], "extend": [0, 2, 12, 34], "just": [0, 29, 34, 37, 46], "method": [0, 10, 14, 15, 19, 23, 27, 37, 40, 42, 43, 46, 47], "code": [0, 1, 2, 10, 12, 13, 23, 25, 26, 33, 34, 35, 38, 39, 40, 42, 47, 48, 49, 50, 54], "zero": 0, "builtin": 0, "huggingfac": [0, 1, 27, 35, 37, 45], "zip": [0, 3, 25, 27, 50, 54], "consist": [0, 23, 24, 40, 45, 51, 59], "artifact": [0, 1, 14, 27, 34, 39, 43], "extens": [0, 24, 46, 50, 56], "cmd": [0, 3, 25, 53], "torch": [0, 3, 14, 22, 27, 35, 36, 39, 41, 46, 47, 48, 52, 54, 55, 56, 59], "step": [0, 3, 22, 27, 28, 29, 32, 39, 46, 54, 56], "given": [0, 3, 10, 19, 37, 39, 40, 41, 54, 56, 59], "current": [0, 10, 24, 25, 29, 31, 36, 37, 38, 39, 40, 48, 50, 51, 53, 58], "allow": [0, 2, 3, 22, 27, 29, 31, 33, 35, 37, 39, 42, 45, 47, 48, 53, 56, 58], "suppli": [0, 25, 27, 51, 58, 59], "one": [0, 17, 25, 28, 30, 31, 33, 35, 39, 40, 45, 48, 50, 53], "number": [0, 1, 3, 14, 22, 23, 25, 27, 28, 29, 31, 33, 34, 35, 37, 39, 40, 45, 47, 54, 56, 58, 59], "model_dir": [0, 10, 12, 16, 18, 27, 35], "locat": [0, 3, 23, 25, 27, 38, 39, 40, 50, 58], "access": [0, 2, 3, 33, 35, 37, 39, 40, 41, 45, 47, 49, 53, 56, 57, 58], "through": [0, 10, 14, 25, 35, 37, 42, 46, 47, 50, 53, 59], "context": [0, 4, 5, 12, 13, 14, 15, 16, 18, 25, 26, 27, 31, 33, 34, 35, 39, 40, 45, 46, 47, 59], "entri": [0, 10, 14, 25, 31, 35, 39, 50, 56], "point": [0, 10, 11, 14, 29, 31, 35, 39, 41, 45, 50], "snippet": [0, 27], "system_properti": [0, 10, 27], "get": [0, 1, 12, 14, 15, 19, 23, 24, 25, 27, 31, 33, 34, 35, 36, 39, 42, 45, 47, 50, 55, 57, 58], "cli": [0, 3, 34, 46], "633": 0, "both": [0, 1, 23, 25, 29, 31, 34, 39, 40, 45, 47, 48, 49, 56], "v2": [0, 15, 39], "signatur": [0, 12, 27, 48, 50], "note": [0, 24, 25, 27, 28, 29, 31, 33, 35, 37, 39, 40, 41, 42, 45, 46, 47, 51, 54, 56], "For": [0, 22, 23, 24, 25, 27, 28, 31, 33, 35, 37, 38, 39, 40, 41, 45, 46, 47, 48, 50, 51, 56], "replac": [0, 15, 19, 31, 33, 35, 39, 47, 53], "charact": [0, 2, 14], "e": [0, 2, 25, 33, 35, 37, 38, 39, 40, 46, 50, 52, 54, 56, 59], "26": 0, "669": 0, "local": [0, 23, 24, 25, 34, 39, 48, 50, 54, 55, 56, 58, 59], "publicli": [0, 2, 3], "uri": [0, 39, 58, 59], "work": [0, 22, 23, 25, 27, 32, 36, 39, 43, 45, 46, 47, 53, 54, 56], "veri": [0, 35], "same": [0, 25, 27, 31, 35, 39, 45, 46, 48, 56], "made": [0, 14, 38, 39, 40, 50, 56], "public": [0, 56, 59], "consol": 0, "instead": [0, 17], "few": [0, 47, 54, 59], "reason": [0, 25, 47], "overhead": [0, 40, 47], "someth": [0, 37], "dramat": [0, 47], "larger": [0, 33, 46, 47], "launch": [0, 23, 37, 59], "control": [0, 1, 29, 34, 39, 45, 50], "dep": [0, 3], "per": [0, 3, 14, 22, 35, 45, 47, 48, 56], "intend": [0, 1, 2], "develop": [0, 1, 29, 56], "should": [0, 14, 17, 23, 25, 27, 29, 30, 35, 37, 45, 46, 47, 48, 53, 54, 56], "pre": [0, 1, 2, 12, 14, 27, 34, 35, 39, 44, 46, 47, 56], "compress": 0, "decompress": 0, "becaus": [0, 23, 39, 40], "histor": 0, "came": 0, "involv": [0, 2, 45, 46], "unload": [0, 40, 42], "ton": 0, "bucket": [0, 22, 39, 46], "But": [0, 45], "user": [0, 2, 14, 19, 22, 25, 27, 31, 33, 34, 35, 37, 39, 40, 42, 47, 50, 51, 53, 54, 56, 59], "smaller": [0, 35, 45, 47], "choos": [0, 39], "good": [0, 51], "bet": 0, "enforc": [1, 2, 39, 53], "token": [1, 2, 14, 21, 26, 31, 33, 35, 37, 44, 46], "author": [1, 2, 21, 26, 33, 47], "enabl": [1, 2, 3, 22, 23, 27, 35, 36, 39, 40, 41, 45, 47, 49, 53, 56], "model": [1, 2, 4, 10, 11, 12, 13, 14, 15, 16, 18, 19, 21, 22, 26, 28, 31, 32, 33, 34, 36, 38, 41, 45, 47, 48, 52, 53, 54, 55, 57, 58], "api": [1, 10, 12, 22, 27, 34, 35, 43, 50, 51, 54, 56, 59], "disabl": [1, 22, 25, 33, 38, 39, 40, 41, 42, 46, 50], "ar": [1, 2, 15, 22, 23, 24, 25, 27, 29, 30, 31, 33, 34, 35, 37, 38, 39, 40, 41, 43, 44, 45, 46, 47, 49, 50, 51, 53, 54, 56, 59], "address": [1, 2, 22, 31, 42, 46, 53], "concern": [1, 2, 42, 53], "unauthor": [1, 25, 53], "call": [1, 15, 17, 25, 27, 29, 31, 33, 34, 35, 37, 38, 39, 40, 42, 43, 46, 47, 48, 49, 50, 51, 53, 56, 57, 58], "prevent": [1, 2, 22, 42, 51, 53], "potenti": [1, 2, 34, 46, 53], "malici": [1, 2, 42], "from": [1, 2, 10, 12, 13, 14, 15, 22, 23, 24, 25, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 45, 46, 47, 48, 49, 50, 51, 53, 56, 57, 58], "being": [1, 2, 19, 23, 35, 40, 42, 46, 50, 53, 56], "introduc": 1, "server": [1, 10, 11, 21, 22, 23, 24, 25, 26, 27, 29, 33, 34, 38, 42, 47, 48, 50, 51, 53, 54, 55, 56, 57], "follow": [1, 2, 10, 14, 15, 22, 23, 25, 27, 28, 29, 30, 31, 33, 35, 37, 38, 39, 40, 42, 45, 47, 50, 51, 53, 54, 56, 57, 58, 59], "inform": [1, 2, 10, 14, 23, 25, 27, 29, 34, 35, 38, 39, 41, 46, 47], "perform": [1, 2, 17, 25, 26, 29, 32, 35, 36, 38, 40, 45, 46], "flexibl": [1, 12, 32, 35, 53], "easi": [1, 29, 32, 37, 50], "us": [1, 2, 10, 11, 12, 14, 15, 17, 19, 25, 26, 27, 30, 31, 32, 33, 34, 37, 38, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 53, 58, 59], "eager": [1, 14, 19, 27, 43, 44], "mode": [1, 14, 22, 27, 35, 41, 44, 45, 47], "torchscript": [1, 14, 43, 44, 47, 56], "quick": [1, 32, 37, 47], "usag": [1, 11, 27, 34, 40, 47, 50, 56], "tutori": [1, 35], "archiv": [1, 22, 25, 28, 34, 35, 36, 39, 43, 44, 47, 50, 52, 54, 55, 56, 58, 59], "show": [1, 12, 23, 29, 39, 40, 43, 45, 46, 47, 50], "how": [1, 23, 25, 27, 29, 32, 34, 37, 38, 45, 46, 47, 50, 51], "packag": [1, 2, 4, 5, 23, 26, 27, 29, 34, 35, 44, 47, 54, 56, 59], "file": [1, 2, 10, 14, 19, 22, 23, 24, 27, 28, 29, 30, 31, 33, 38, 39, 40, 41, 42, 43, 47, 48, 51, 53, 54, 56, 58], "procedur": [1, 56], "explain": [1, 27, 32, 33, 35, 38], "rest": [1, 21, 25, 26, 33, 35, 50, 56, 57, 59], "specif": [1, 2, 14, 15, 31, 33, 34, 35, 39, 42, 45, 47, 49, 50, 56, 58], "endpoint": [1, 2, 10, 14, 29, 32, 33, 34, 37, 40, 41, 50], "grpc": [1, 2, 21, 26, 33, 39], "support": [1, 3, 11, 12, 14, 24, 25, 28, 29, 31, 33, 34, 35, 39, 40, 47, 48, 50, 51, 55, 57, 58, 59], "infer": [1, 10, 12, 13, 14, 21, 22, 25, 26, 27, 29, 31, 32, 34, 38, 39, 40, 44, 47, 49, 50, 53, 54, 56], "health": [1, 12, 21, 26, 31, 35], "deploi": [1, 3, 32, 34, 45, 46, 48], "scale": [1, 3, 21, 26, 27, 29, 32, 35, 42, 56, 58, 59], "log": [1, 3, 22, 25, 26, 34, 41, 46, 51, 53], "metric": [1, 4, 5, 10, 21, 22, 26, 32, 34, 38, 39, 49, 54], "prometheu": [1, 21, 26], "grafana": [1, 21, 26], "dashboard": [1, 41, 47], "captum": [1, 14, 15, 33, 44, 52], "explan": [1, 14, 15, 21, 26, 32, 48], "built": [1, 23, 25, 47, 50], "text": [1, 3, 14, 25, 27, 28, 36, 44], "imag": [1, 2, 10, 14, 22, 23, 25, 27, 28, 29, 33, 35, 37, 44, 47, 50, 56, 57], "batch": [1, 10, 11, 14, 15, 16, 25, 26, 27, 28, 32, 34, 35, 39, 40, 45, 46, 47, 59], "creat": [1, 3, 10, 11, 13, 23, 25, 29, 34, 35, 39, 41, 43, 45, 47, 53, 56], "workflow": [1, 21, 22, 25, 26, 29, 30, 31, 34, 35, 36, 44, 49, 50, 51, 52], "compos": [1, 14], "python": [1, 2, 10, 11, 15, 21, 22, 23, 26, 30, 35, 36, 37, 39, 48, 54, 55, 56, 59], "function": [1, 2, 3, 11, 12, 14, 15, 17, 19, 23, 27, 33, 34, 39, 47, 59], "sequenti": 1, "parallel": [1, 2, 35, 37, 45], "pipelin": [1, 35, 46], "classifi": [1, 27, 28, 40, 44, 56], "take": [1, 10, 14, 17, 25, 27, 29, 34, 35, 39, 45, 47, 50, 53], "an": [1, 2, 3, 11, 12, 14, 16, 19, 22, 23, 25, 29, 31, 33, 34, 35, 36, 39, 40, 43, 45, 46, 47, 50, 53, 54, 56], "return": [1, 10, 11, 12, 13, 14, 15, 16, 19, 29, 31, 33, 35, 39, 40, 41, 45, 50, 57, 58], "name": [1, 3, 10, 11, 14, 15, 19, 23, 25, 27, 28, 29, 30, 33, 34, 35, 37, 38, 39, 40, 41, 47, 48, 50, 54, 56, 58, 59], "input": [1, 2, 12, 14, 15, 17, 27, 28, 33, 35, 39, 44, 45, 46, 47, 48, 59], "classif": [1, 14, 27, 28, 44, 45], "base": [1, 4, 10, 11, 12, 14, 17, 18, 19, 25, 26, 27, 35, 41, 46, 47, 50, 56, 59], "vocabulari": [1, 14], "detector": [1, 27, 28, 56], "detect": [1, 2, 14, 22, 28, 44, 48, 50, 54, 56], "class": [1, 10, 11, 12, 14, 15, 16, 17, 18, 19, 28, 33, 35, 40, 48], "bound": [1, 14, 28], "respect": [1, 14, 28], "segment": [1, 14, 27, 28, 44, 56], "output": [1, 12, 14, 15, 25, 27, 28, 29, 33, 38, 39, 47, 59], "shape": [1, 14, 15, 28], "cl": [1, 28], "h": [1, 14, 28, 33, 39, 50, 53], "height": [1, 14, 28], "width": [1, 14, 28], "llm": [1, 31, 33, 35], "easili": [1, 12, 37, 49], "languag": [1, 37, 48], "sentenc": 1, "can": [1, 2, 16, 19, 23, 25, 27, 28, 29, 31, 33, 34, 35, 36, 37, 39, 40, 41, 42, 45, 46, 47, 49, 50, 51, 53, 54, 56, 58, 59], "sequenc": [1, 31, 33, 35, 44, 45, 46], "q": 1, "answer": [1, 54], "multi": [1, 32, 34, 45, 47, 48], "modal": 1, "framework": [1, 12, 22, 23, 34, 35, 47, 48], "build": [1, 23, 34, 37, 47, 54, 56], "combin": [1, 35, 37], "audio": [1, 47], "video": [1, 47], "dual": 1, "translat": [1, 15, 48], "train": [1, 17, 28, 29, 44, 46, 50, 56], "readi": [1, 23, 27, 29, 31, 33, 35, 39, 44, 48, 56], "mani": [1, 34, 45, 47, 48, 50, 51], "intern": [1, 12, 27, 35], "integr": [1, 2, 35, 37, 47], "usecas": [1, 28], "resnet50": 1, "hpu": 1, "compil": [1, 47], "run": [1, 2, 3, 10, 12, 14, 17, 23, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 42, 46, 47, 51, 53, 54, 56], "devic": [1, 14, 22, 25, 27, 28, 35, 37, 45, 56], "describ": [1, 14, 21, 25, 26, 37, 46, 59], "test": [1, 4, 5, 16, 17, 23, 24, 26, 28, 29, 30, 34, 36, 37, 39, 48, 54], "regress": [1, 22, 30, 35, 36], "befor": [1, 2, 23, 25, 27, 28, 33, 39, 45, 46], "ship": 1, "them": [1, 11, 17, 34, 39, 45, 46, 50, 53, 56], "product": [1, 32, 34, 47, 50, 56], "custom": [1, 2, 10, 11, 14, 22, 23, 26, 34, 35, 37, 39, 41, 43, 47], "encrypt": [1, 21, 26], "s3": [1, 25, 39], "side": [1, 21, 26, 33, 39, 49], "km": [1, 39, 40], "serial": [1, 22, 27, 29, 43, 45, 47, 56], "aw": [1, 2, 25, 32, 39, 45], "dynamo": 1, "db": 1, "benchmark": [1, 29, 34], "profil": [1, 54], "jmeter": 1, "apach": 1, "bench": 1, "itself": [1, 45], "kubernet": [1, 33, 34], "demonstr": [1, 27, 40, 56], "deploy": [1, 32, 35, 45, 48, 56], "azur": 1, "googl": [1, 31, 32, 34, 35, 48], "mlflow": 1, "kubeflow": 1, "vertex": [1, 32], "ai": [1, 32, 34, 35, 48], "nvidia": [1, 26, 46, 47, 54, 55, 56], "mp": [1, 26, 47], "optim": [1, 23, 32, 35, 37], "worker": [1, 10, 11, 21, 22, 23, 26, 27, 29, 31, 33, 34, 35, 38, 40, 42, 45, 47, 51, 56, 59], "singl": [1, 11, 12, 35, 37, 45, 50], "11": [2, 22, 30, 42, 45], "white_check_mark": 2, "much": [2, 46, 50], "possibl": [2, 48, 56], "torchserv": [2, 10, 14, 15, 18, 19, 21, 22, 24, 26, 27, 33, 39, 41, 42, 44, 48, 52, 57, 58], "reli": 2, "autom": 2, "scan": 2, "In": [2, 3, 23, 25, 27, 29, 35, 38, 40, 45, 46, 47, 50, 53, 56], "particular": [2, 29], "depend": [2, 22, 25, 29, 30, 31, 34, 35, 37, 40, 45, 47, 54, 55], "analysi": 2, "dependabot": 2, "docker": [2, 22, 34, 37, 47, 56], "snyk": 2, "codeql": 2, "listen": [2, 10, 29, 31, 33, 34, 39, 41, 49, 57, 58], "port": [2, 23, 31, 33, 34, 39, 41, 49, 50, 54, 57, 58], "8080": [2, 22, 23, 25, 29, 33, 37, 39, 49, 53, 56, 57], "8081": [2, 22, 23, 25, 39, 42, 49, 53, 56, 58], "8082": [2, 22, 23, 25, 41], "7070": [2, 25, 31], "7071": [2, 25, 31], "localhost": [2, 3, 23, 25, 31, 33, 37, 39, 41, 42, 49, 53, 56, 57, 58], "default": [2, 3, 11, 14, 15, 21, 22, 25, 26, 31, 33, 34, 35, 38, 41, 45, 47, 49, 50, 51, 53, 56, 57, 58, 59], "doe": [2, 10, 14, 31, 35, 43, 47, 51], "ani": [2, 3, 19, 27, 29, 31, 33, 34, 35, 37, 38, 39, 40, 56], "includ": [2, 22, 23, 28, 33, 35, 39, 40, 47, 48, 56, 59], "wildcard": 2, "pleas": [2, 23, 28, 31, 33, 35, 37, 39, 42, 44, 45, 47, 56], "awar": [2, 46], "risk": [2, 25], "give": [2, 3, 33, 47], "host": [2, 23, 25, 29, 30, 34, 35, 38, 40, 41, 42, 49, 50, 56], "shown": [2, 27, 35, 40, 45, 46, 47], "abov": [2, 24, 25, 27, 29, 38, 40, 44, 45, 53, 54, 56, 59], "s": [2, 3, 10, 11, 12, 19, 22, 25, 26, 27, 28, 29, 32, 33, 35, 37, 39, 41, 45, 46, 47, 50, 51, 56, 58, 59], "expos": [2, 10, 25, 47], "when": [2, 10, 11, 14, 19, 23, 25, 27, 29, 30, 31, 32, 33, 35, 38, 39, 40, 41, 42, 45, 46, 47, 48, 49, 51, 53], "contain": [2, 14, 25, 27, 28, 35, 37, 39, 43, 48, 50, 56], "map": [2, 10, 11, 14, 19, 28, 35, 59], "ip": [2, 25, 42], "Be": [2, 38], "sure": [2, 16, 23, 27, 28, 29, 35, 37, 39, 45, 46, 48, 54, 56], "valid": [2, 25], "authent": [2, 25], "mar": [2, 22, 23, 25, 27, 29, 33, 34, 35, 39, 40, 42, 43, 44, 50, 56, 58, 59], "download": [2, 3, 29, 35, 39, 41, 44, 54, 58], "internet": [2, 39, 58], "untrustworthi": 2, "sourc": [2, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 25, 29, 35, 36], "mai": [2, 3, 25, 27, 42, 53, 54], "have": [2, 22, 23, 25, 27, 29, 31, 33, 35, 37, 39, 40, 41, 45, 46, 47, 50, 53, 54, 56, 59], "compromis": 2, "applic": [2, 3, 25, 27, 31, 33, 37, 39, 42, 46, 47, 53, 56], "execut": [2, 16, 19, 22, 24, 25, 29, 30, 35, 41, 45, 47, 54, 56, 59], "arbitrari": [2, 25], "make": [2, 16, 23, 25, 27, 28, 29, 33, 34, 35, 37, 39, 40, 45, 46, 47, 48, 50, 54, 56, 57], "ve": [2, 29, 47], "audit": 2, "re": [2, 33, 47, 48, 50, 54, 59], "safe": 2, "trust": 2, "handler": [2, 3, 10, 14, 15, 16, 25, 26, 31, 33, 34, 35, 37, 39, 40, 43, 47, 48, 56], "clamd": 2, "org": [2, 15, 22, 23, 25, 29, 33, 34, 35, 38, 39, 42, 53, 54], "project": [2, 34, 35], "virustot": 2, "io": [2, 30, 46], "vt": 2, "fickl": 2, "trailofbit": 2, "insid": [2, 15, 45], "untrust": 2, "guarante": [2, 22, 34], "isol": [2, 45], "perspect": 2, "regist": [2, 17, 21, 23, 25, 26, 29, 31, 40, 42, 43, 47, 50, 56, 59], "url": [2, 3, 10, 22, 23, 25, 39, 42, 45, 50, 56, 58, 59], "set": [2, 3, 10, 15, 21, 22, 23, 25, 26, 31, 33, 35, 37, 40, 41, 45, 47, 48, 50, 51, 57, 58], "allowed_url": [2, 25], "paramet": [2, 10, 11, 12, 13, 14, 15, 19, 27, 35, 37, 38, 39, 40, 50, 51, 56, 58, 59], "config": [2, 22, 24, 33, 34, 40, 41, 42, 43, 45, 47, 48, 50, 51, 53, 54, 56], "properti": [2, 3, 10, 27, 34, 40, 41, 42, 47, 48, 50, 53, 54, 56], "restrict": [2, 37, 40, 56], "use_env_allowed_url": [2, 25], "read": [2, 10, 23, 25, 27, 53], "ssl": [2, 56], "two": [2, 3, 23, 25, 27, 38, 39, 43, 45, 49, 56], "wai": [2, 3, 25, 27, 35, 38, 39, 43, 50, 51], "keystor": [2, 25], "privat": [2, 25, 39], "kei": [2, 10, 11, 15, 25, 39, 48, 53, 56], "certif": [2, 25], "prepar": 2, "against": [2, 27, 38, 53], "bad": 2, "prompt": [2, 37, 54], "inject": 2, "some": [2, 3, 10, 27, 28, 29, 30, 35, 36, 37, 38, 39, 45, 46, 47, 54, 56], "g": [2, 23, 24, 25, 33, 35, 37, 39, 46, 54, 56, 59], "fuzz": 2, "sanit": 2, "feed": [2, 17, 46, 59], "rigor": 2, "techniqu": [2, 46], "strict": 2, "rule": 2, "filter": [2, 25], "remov": [2, 3, 14, 19, 53, 54], "fragment": 2, "special": [2, 44], "represent": 2, "verif": 2, "identifi": [2, 27, 37], "attempt": [2, 38, 59], "multipl": [2, 14, 25, 32, 33, 35, 40, 43, 45, 47, 56], "share": [2, 45, 56], "memori": [2, 11, 23, 25, 35, 40, 45, 47], "respons": [2, 3, 13, 14, 15, 22, 25, 27, 31, 33, 34, 39, 40, 56, 57], "ensur": [2, 16, 22, 27, 40, 46, 54], "interact": [2, 29, 34], "each": [2, 23, 25, 28, 31, 33, 35, 45, 47, 51, 53, 59], "other": [2, 3, 27, 34, 35, 39, 45, 46, 56, 58, 59], "primari": 2, "area": 2, "tenant": 2, "resourc": [2, 23, 29, 31, 35, 39, 45, 46, 54, 58], "alloc": [2, 25, 29, 45], "attack": 2, "www": [2, 15, 25, 35], "facebook": 2, "whitehat": 2, "amazon": [2, 32, 36, 41], "section": [3, 23, 29, 35, 40, 45, 53, 59], "common": [3, 30, 31, 47], "face": [3, 23], "correspond": [3, 29, 40, 48, 56], "usual": [3, 35, 37, 54], "verifi": [3, 23, 35, 36], "ss": 3, "ntl": 3, "grep": [3, 22], "kill": 3, "differ": [3, 11, 12, 25, 27, 31, 34, 35, 39, 40, 45, 46, 47, 48, 50, 53, 56], "than": [3, 25, 33, 46], "md": [3, 24, 56, 59], "542": 3, "occur": 3, "17": [3, 22, 30, 54, 55], "older": 3, "max": [3, 22, 23, 35, 38, 47, 59], "size": [3, 10, 11, 14, 22, 23, 25, 27, 28, 34, 35, 37, 38, 39, 44, 45, 46, 47, 59], "roughli": 3, "6": [3, 28, 30, 35, 52], "5": [3, 14, 15, 28, 32, 35, 38, 39, 41, 52], "mb": [3, 11, 38, 40, 44], "henc": [3, 29], "greater": 3, "5mb": 3, "cannot": [3, 42], "upload": [3, 30, 42], "updat": [3, 11, 23, 28, 29, 32, 35, 39, 40, 46, 53, 56, 59], "max_request_s": [3, 25], "max_response_s": [3, 25], "cat": [3, 23, 33], "model_stor": [3, 23, 25, 29, 39, 40, 42, 45, 50, 53, 56], "ts": [3, 4, 5, 23, 25, 26, 27, 28, 31, 33, 34, 35, 37, 38, 39, 40, 42, 48, 50, 51, 56], "path": [3, 10, 23, 24, 25, 27, 29, 38, 39, 40, 43, 47, 50, 54, 55, 56, 58], "335": 3, "nc": [3, 22, 29, 40, 42, 50, 53, 56], "383": 3, "512": [3, 35, 46], "last": [3, 31, 33, 35, 51], "restor": [3, 51], "state": [3, 33, 34, 50, 51], "thrown": 3, "inconsist": 3, "compar": [3, 45, 47], "log_loc": [3, 38, 51], "system": [3, 11, 12, 22, 25, 27, 31, 34, 39, 40, 47, 48, 51, 54, 56, 58], "export": [3, 10, 25, 27, 29, 36, 37, 39, 47, 55], "desir": [3, 27], "extract": 3, "654": 3, "clear": 3, "messag": [3, 10, 12, 13, 19, 33, 38, 42, 50], "try": [3, 14, 39, 46, 47, 50, 53], "conflict": 3, "exist": [3, 11, 25, 28, 31, 33, 34, 35, 40], "500": [3, 19, 33, 40], "wa": [3, 23, 33, 34, 36, 38, 46], "whether": [3, 39], "spawn": [3, 47], "up": [3, 12, 23, 25, 27, 29, 31, 32, 34, 35, 39, 45, 46, 47, 50, 53, 56, 58], "increas": [3, 27, 39, 45, 46, 47, 56], "curl": [3, 21, 22, 23, 26, 29, 30, 37, 39, 41, 42, 53, 54, 56, 58], "x": [3, 23, 25, 30, 33, 37, 39, 42, 56, 58], "model_nam": [3, 10, 11, 12, 15, 16, 18, 27, 33, 35, 37, 39, 40, 41, 50, 56, 59], "like": [3, 11, 14, 15, 25, 29, 32, 33, 34, 35, 37, 39, 40, 46, 47, 50], "egg": [3, 52], "json": [3, 4, 10, 11, 14, 19, 24, 25, 26, 29, 33, 34, 35, 37, 39, 45, 47, 48, 53, 56, 57, 58, 59], "etc": [3, 27, 34, 46, 56], "write": [3, 25, 28, 48, 53], "566": 3, "waveglow": [3, 27], "speech": [3, 27, 36], "synthes": [3, 27], "creation": [3, 39], "mostli": [3, 47], "initi": [3, 12, 14, 18, 22, 25, 27, 35, 39, 40, 42, 43, 47, 56], "due": [3, 45], "erron": 3, "observ": 3, "miss": [3, 14, 27], "modul": [3, 4, 5, 26, 35, 54], "667": 3, "537": 3, "subpackag": [4, 5, 26], "submodul": [4, 5, 26, 31], "dimens": [4, 5, 10, 26, 47], "metric_collector": [4, 5, 10, 25, 26], "metric_encod": [4, 5, 10, 26], "metrics_stor": [4, 5, 10, 26], "process_memory_metr": [4, 5, 10, 26], "system_metr": [4, 5, 10, 26], "unit": [4, 5, 10, 16, 26, 28, 39, 40, 41], "model_servic": [4, 5, 10, 26], "protocol": [4, 5, 10, 15, 25, 26, 31, 33, 35, 39, 58], "otf_message_handl": [4, 5, 10, 26, 31, 33, 35], "torch_handl": [4, 5, 10, 26, 27, 28, 35], "request_envelop": [4, 10, 14, 26], "bodi": [4, 10, 14, 26, 27, 48], "kserv": [4, 10, 14, 21, 26, 39, 48], "kservev2": [4, 10, 14, 26], "unit_test": [4, 10, 14, 24, 26], "base_model": [4, 14, 16, 26], "test_util": [4, 14, 16, 26, 33, 35], "mock_context": [4, 14, 16, 26], "test_base_handl": [4, 10, 14, 26], "test_envelop": [4, 10, 14, 26], "test_image_classifi": [4, 10, 14, 26], "test_image_segment": [4, 10, 14, 26], "test_mnist_kf": [4, 10, 14, 26], "test_object_detector": [4, 10, 14, 26], "base_handl": [4, 5, 10, 26, 27, 47], "contract": [4, 5, 10, 26], "densenet_handl": [4, 5, 10, 26], "image_classifi": [4, 5, 10, 22, 23, 26, 27, 29, 31, 33, 53], "image_segment": [4, 5, 10, 26], "object_detector": [4, 5, 10, 26], "text_classifi": [4, 5, 10, 23, 26], "text_handl": [4, 5, 10, 26], "vision_handl": [4, 5, 10, 26], "timeit_decor": [4, 5, 10, 26], "arg_pars": [4, 5, 26], "model_load": [4, 5, 26], "model_serv": [4, 5, 26], "model_service_work": [4, 5, 26], "run_circleci_test": [4, 5, 26], "setup": [4, 5, 22, 25, 26, 27, 30, 34, 35, 39, 41, 56], "regression_test": [4, 5, 26], "torchserve_san": [4, 5, 24, 26], "ts_script": [4, 5, 24, 26, 29, 30, 31, 34, 36, 54, 55], "api_util": [4, 5, 26], "backend_util": [4, 5, 26], "frontend_util": [4, 5, 26], "install_depend": [4, 5, 24, 26, 29, 30, 36, 54], "install_from_src": [4, 5, 26, 54, 55], "marsgen": [4, 5, 26], "modelarchiver_util": [4, 5, 26], "print_env_info": [4, 5, 26], "regression_util": [4, 5, 26], "sanity_util": [4, 5, 26], "shell_util": [4, 5, 26], "torchserve_grpc_cli": [4, 5, 26, 29, 31], "tsutil": [4, 5, 26], "validate_model_on_gpu": [4, 5, 26], "workflow_archiver_util": [4, 5, 26], "pars": [10, 14, 19, 58], "argpars": 10, "parser": [10, 34], "todo": 10, "add": [10, 11, 28, 31, 34, 35, 37, 38, 39, 42, 48, 53, 54, 56], "static": [10, 25, 46], "extract_arg": 10, "arg": [10, 14, 17, 27, 34, 35, 39], "none": [10, 11, 12, 13, 14, 18, 19, 25, 27, 33, 35, 39, 40], "model_service_worker_arg": 10, "backend": [10, 19, 23, 27, 29, 31, 32, 33, 35, 38, 39, 41], "socket": [10, 13, 34, 47], "ts_parser": 10, "incom": [10, 23, 27, 34], "request": [10, 12, 14, 15, 22, 23, 25, 26, 29, 30, 31, 33, 34, 35, 37, 38, 39, 40, 41, 42, 44, 45, 50, 56, 59], "manifest": [10, 12, 27, 34, 39, 47, 50, 58], "batch_siz": [10, 23, 39, 47, 59], "mms_version": 10, "limit_max_image_pixel": [10, 25], "model_yaml_config": [10, 25, 35], "fix": [10, 46, 48], "time": [10, 11, 19, 23, 25, 27, 29, 33, 34, 35, 38, 39, 50, 51, 53, 59], "get_all_request_head": 10, "idx": [10, 11, 18, 40], "int": [10, 11, 14, 40, 59], "dict": [10, 12, 14, 27, 34, 39, 40, 59], "str": [10, 11, 14, 19, 27, 33, 35, 40], "get_request_head": [10, 18, 39], "get_request_id": [10, 40], "get_response_content_typ": 10, "get_response_head": 10, "get_response_statu": 10, "tupl": 10, "get_sequence_id": 10, "request_processor": 10, "set_all_response_statu": 10, "200": [10, 31, 33, 35, 38, 39, 40, 42], "phrase": 10, "statu": [10, 23, 31, 33, 35, 39, 40, 42, 58], "individu": 10, "param": [10, 11, 14, 19, 27, 29, 56, 59], "set_response_content_typ": 10, "set_response_head": 10, "set_response_statu": 10, "index": [10, 11, 28, 40, 56], "sent": [10, 12, 25, 27, 31, 33, 35], "handl": [10, 12, 14, 15, 23, 25, 28, 31, 33, 34, 35, 39, 47, 50], "requestprocessor": 10, "request_head": 10, "processor": [10, 25], "add_response_properti": 10, "get_request_properti": 10, "get_response_status_cod": 10, "get_response_status_phras": 10, "report_statu": 10, "reason_phras": 10, "loader": [10, 34, 47], "modelload": 10, "abstract": [10, 12, 15, 27], "gpu_id": [10, 18, 27], "envelop": [10, 15, 26, 34], "bool": [10, 17, 19], "modelloaderfactori": 10, "get_model_load": 10, "tsmodelload": 10, "1": [10, 15, 22, 23, 25, 27, 28, 29, 33, 37, 38, 39, 40, 41, 42, 44, 45, 47, 50, 53, 56, 58, 59], "metrics_cach": 10, "metricscacheyamlimpl": 10, "load_properti": 10, "file_path": 10, "modelservicework": 10, "mm": [10, 12], "front": [10, 11], "end": [10, 11, 40, 43, 46], "commun": [10, 44], "binari": [10, 22, 34], "torchmodelservicework": 10, "s_type": 10, "s_name": 10, "host_addr": 10, "port_num": 10, "metrics_config": [10, 40], "handle_connect": 10, "cl_socket": 10, "connect": [10, 34, 39, 45], "load_model": [10, 23, 25], "load_model_request": 10, "expect": [10, 15, 23, 25, 28, 33, 45, 47, 48, 53, 56], "modelpath": 10, "modelnam": [10, 23, 25, 39, 40, 41, 42, 58], "cpu": [10, 11, 22, 24, 27, 29, 30, 32, 40, 44, 46, 47], "els": [10, 14, 27, 37, 39, 40], "wrapper": [10, 12], "unwrapp": 10, "batchsiz": [10, 23, 25, 31, 35, 39, 58], "limitmaximagepixel": 10, "limit": [10, 22, 35, 39, 45, 47, 55, 56, 58], "pillow": [10, 46], "max_image_pixel": 10, "run_serv": 10, "process": [10, 11, 12, 14, 23, 25, 27, 29, 32, 34, 35, 39, 43, 45, 46, 47], "customservic": 10, "definit": [10, 14, 19, 40], "entry_point": 10, "predict": [10, 14, 15, 21, 22, 23, 25, 26, 28, 31, 35, 37, 38, 39, 40, 50, 53, 56, 59], "request_input": 10, "retrieve_data_for_infer": 10, "requestid": 10, "111": [10, 15], "222": 10, "3333": 10, "contenttyp": 10, "val1": 10, "set_cl_socket": 10, "emit_metr": 10, "emit": [10, 19, 40], "dictionari": [10, 11, 14, 15, 19, 27, 39], "metric_nam": [10, 40], "c": [10, 22, 25, 29, 32, 34, 38, 45, 49, 50, 54], "standard": [10, 27, 42, 48, 53], "ping": [10, 12, 23, 31, 33, 35], "descript": [10, 21, 26, 28, 35, 50, 59], "d": [10, 14, 37, 38, 39, 45, 48, 50], "wait": [10, 23, 35, 39, 45, 59], "to_dict": 11, "request_id": [11, 31, 33, 35, 40], "metric_method": 11, "gener": [11, 23, 24, 25, 27, 29, 31, 32, 33, 35, 39, 44, 45, 47, 49, 51, 53], "print": [11, 38, 39], "stdout": [11, 29, 38], "reset": [11, 39], "order": [11, 22, 25, 39, 40, 45, 46, 48, 53, 56], "float": [11, 40], "dump": [11, 38], "metricencod": 11, "skipkei": 11, "fals": [11, 13, 22, 23, 25, 27, 30, 33, 35, 39, 41, 42, 53], "ensure_ascii": 11, "check_circular": 11, "allow_nan": 11, "sort_kei": 11, "indent": 11, "separ": [11, 25, 28, 35, 45], "jsonencod": 11, "obj": 11, "collect": [11, 25, 32, 38, 39, 40, 41, 46, 47, 50], "metricsstor": 11, "deprec": [11, 19, 31, 33, 35], "And": [11, 29, 34, 39, 47], "keep": [11, 34, 39], "add_count": [11, 40], "counter": [11, 41, 46], "increment": [11, 40], "add_error": 11, "error": [11, 14, 25, 38], "add_metr": [11, 40], "add_perc": [11, 40], "percentag": 11, "add_siz": [11, 40], "kb": [11, 40, 44], "gb": [11, 40], "add_tim": [11, 39, 40], "ms": [11, 23, 38, 39, 40, 41, 59], "latenc": [11, 31, 33, 40, 45, 46, 47], "accept": [11, 25, 27, 35, 39, 40], "pass": [11, 17, 18, 23, 25, 27, 35, 39, 43, 46, 47, 48, 51, 58, 59], "pid": [11, 23, 34, 39], "gpuid": [11, 23], "check_process_mem_usag": 11, "stdin": 11, "mem_util": 11, "get_cpu_usag": 11, "psutil": [11, 34, 55], "collect_al": 11, "mod": 11, "num_of_gpu": 11, "cpu_util": 11, "disk_avail": 11, "disk_us": 11, "disk_util": 11, "gpu_util": 11, "memory_avail": 11, "memory_us": 11, "memory_util": 11, "element": 11, "modelservic": 12, "wrap": [12, 50], "preprocess": [12, 14, 27, 35, 38, 39, 48, 58, 59], "manner": 12, "backward": [12, 34, 40, 47], "raw": [12, 14, 15, 23, 27, 29, 33, 55, 57], "back": [12, 27, 35], "client": [12, 21, 22, 26, 33, 34, 35, 39, 45, 49], "healthi": [12, 23, 33], "singlenodeservic": 12, "singlenodemodel": 12, "otf": 13, "create_load_model_respons": 13, "create_predict_respons": 13, "ret": 13, "req_id_map": 13, "ts_stream_next": 13, "encode_response_head": 13, "resp_hdr_map": 13, "retrieve_msg": 13, "conn": 13, "retriev": 13, "channel": [13, 25], "send_intermediate_predict_respons": [13, 31, 33, 35], "state_dict": 14, "basehandl": [14, 16, 17, 19, 28, 39, 43, 47], "abc": [14, 15, 35], "describe_handl": [14, 39], "explain_handl": [14, 27, 39], "data_preprocess": [14, 27, 39], "raw_data": [14, 27], "tensor": [14, 27, 35, 37, 46, 59], "unprocess": [14, 27], "target": [14, 23, 27, 41, 46, 56], "get_devic": 14, "self": [14, 25, 27, 30, 35, 39, 40, 43, 47], "outcom": [14, 39, 56], "pertain": [14, 38, 39], "kwarg": [14, 17, 27], "pt": [14, 17, 18, 27, 43, 56], "first": [14, 23, 25, 27, 29, 30, 32, 35, 37, 40, 45, 46, 51], "rais": [14, 27], "runtimeerror": [14, 27], "setup_ort_sess": 14, "model_pt_path": [14, 27, 47], "map_loc": 14, "densenethandl": 14, "match": [14, 27], "list_classes_from_modul": [14, 19], "parent_class": [14, 19], "imageclassifi": [14, 16, 27], "visionhandl": 14, "get_max_result_class": 14, "image_process": 14, "resiz": 14, "256": 14, "interpol": 14, "bilinear": 14, "max_siz": 14, "antialia": 14, "centercrop": 14, "224": 14, "totensor": 14, "normal": 14, "mean": [14, 25, 34, 35, 47], "485": 14, "456": 14, "406": 14, "std": 14, "229": 14, "225": 14, "set_max_result_class": 14, "topk": 14, "imagesegment": [14, 16], "n": [14, 22, 24, 25, 28, 31, 33, 35, 38, 39, 54], "k": [14, 23, 56], "objectdetector": [14, 16], "threshold": 14, "NOT": [14, 22, 35, 53], "textclassifi": 14, "texthandl": 14, "get_insight": [14, 27], "text_preprocess": 14, "calcul": [14, 27, 35], "insight": [14, 27, 47], "word": 14, "import": [14, 27, 31, 33, 35, 39, 40, 47], "form": [14, 15, 25, 40, 42, 47], "whose": [14, 27, 44], "ngram": 14, "2": [14, 15, 22, 23, 25, 27, 28, 30, 32, 33, 35, 37, 39, 42, 46, 47, 53, 56, 58, 59], "come": [14, 25, 34, 46, 47, 50], "output_explain": [14, 27], "hit": 14, "basic": [14, 16, 27, 35], "cleanup": 14, "oper": [14, 23, 40, 45, 46, 47], "html": [14, 24], "lowercas": 14, "expand": 14, "i": [14, 24, 25, 31, 32, 33, 35, 40, 43, 46, 54, 56], "don": [14, 34], "do": [14, 22, 24, 25, 27, 28, 35, 37, 38, 43, 47, 53, 56], "accent": 14, "punctuat": 14, "source_vocab": 14, "after": [14, 29, 30, 37, 38, 39, 40, 42, 46, 47, 50, 51, 54], "perfom": 14, "get_source_vocab_path": 14, "ctx": [14, 35], "get_word_token": 14, "input_token": 14, "construct": 14, "necessari": [14, 29, 35], "summarize_attribut": 14, "attribut": [14, 27, 40, 47], "summaris": 14, "vision": [14, 25, 46, 56], "tensor_data": 14, "requestenvelop": 15, "reformat": 15, "orchestr": [15, 48], "seldon": [15, 34, 48], "flat": [15, 48], "item": [15, 39, 46, 54, 58], "vice": [15, 35], "versa": [15, 35], "baseenvelop": 15, "handle_fn": [15, 16], "interfac": [15, 25, 54], "format_output": 15, "parse_input": 15, "grab": 15, "bodyenvelop": 15, "structur": [15, 48, 59], "outlin": 15, "tensorflow": 15, "tfx": 15, "api_rest": 15, "jsonenvelop": 15, "implement": [15, 27, 35, 39, 46, 47, 59], "captur": [15, 38, 59], "kserveenvelop": 15, "readabl": 15, "kservev2envelop": 15, "fserv": 15, "id": [15, 19, 23, 25, 27, 39, 40, 56], "f0222600": 15, "353f": 15, "47df": 15, "8d9d": 15, "c96d96fa894": 15, "bert": [15, 27, 44, 45, 46], "model_vers": [15, 39, 40, 41], "datatyp": 15, "int64": 15, "37": 15, "66": 15, "108": 15, "109": 15, "base_model_context": 16, "test_batch_handl": 16, "test_inference_with_profiler_works_with_custom_initialize_method": 16, "test_single_handl": 16, "test_binari": 16, "test_bodi": 16, "test_json": 16, "test_json_batch": 16, "test_json_double_batch": 16, "complex": 16, "mux": 16, "sever": [16, 34], "demux": 16, "result": [16, 19, 27, 31, 33, 35, 42, 45, 47, 50, 53, 57], "image_byt": 16, "tmp_path_factori": 16, "test_handl": 16, "test_handle_explain": 16, "simpl": [17, 29, 46], "forward": [17, 27, 46], "argmaxmodel": 17, "comput": [17, 29, 45], "everi": [17, 30, 40, 59], "subclass": 17, "although": 17, "recip": 17, "within": [17, 33, 39, 50], "afterward": 17, "sinc": [17, 40, 46], "former": 17, "care": [17, 34], "hook": 17, "latter": 17, "silent": 17, "ignor": [17, 51], "save_pt_fil": 17, "filepath": 17, "mock": 18, "ad": [18, 22, 32, 34, 35, 37, 40, 45, 47, 53], "without": [18, 35, 39, 45, 50, 51], "mockcontext": 18, "model_pt_fil": 18, "tmp": [18, 56], "model_fil": 18, "mnist": [18, 22, 27, 33, 44, 50, 56], "model_yaml_config_fil": 18, "replic": 18, "exp": 18, "timeit": 19, "decor": [19, 46], "func": 19, "pt2backend": 19, "enum": [19, 34, 40], "enumer": 19, "aot_cudagraph": 19, "aot_eag": 19, "aot_nvfus": 19, "fx2trt": 19, "inductor": 19, "ipex": [19, 37, 47], "nvfuser": 19, "ofi": 19, "onnxrt": 19, "openvino": 19, "torchxla_trace_onc": 19, "except": [19, 23, 45], "predictionexcept": [19, 27], "error_cod": 19, "check_valid_pt2_backend": 19, "klass": 19, "pendingdeprecationwarn": 19, "mark": 19, "warn": [19, 38, 40, 53], "categori": 19, "get_yaml_config": 19, "yaml_file_path": 19, "load_label_map": 19, "mapping_file_path": 19, "friendli": [19, 28], "map_class_to_label": 19, "prob": 19, "lbl_class": 19, "probabl": [19, 28, 33], "stream": [21, 26, 33], "unregist": [21, 25, 26, 31, 59], "ci": [22, 30, 34, 36], "job": [22, 25, 30, 32, 34, 40], "auto": [22, 25, 35], "devicetyp": [22, 35], "yaml": [22, 25, 33, 40, 43, 47, 58, 59], "report": [22, 24, 29, 47], "pytest": [22, 52], "maco": [22, 30, 39], "been": [22, 36, 39, 46, 47, 50, 53, 54, 56, 58], "densenet161": [22, 29, 31, 44, 53], "alexnet": [22, 44], "model_store_gen": 22, "10": [22, 23, 25, 38, 39, 40, 54, 55], "16": [22, 25, 38, 45], "heap": 22, "8192": 22, "m": [22, 24, 29, 31, 32, 37, 38], "librari": [22, 35, 47], "bin": [22, 54, 55], "python3": [22, 27], "127": [22, 23, 25, 29, 33, 38, 41, 42, 53, 56], "dir": [22, 27, 35, 56], "thread": [22, 25, 34, 35, 40, 47], "blacklist": 22, "regex": [22, 25], "maximum": [22, 23, 25, 33, 39, 45, 58, 59], "6553500": [22, 25], "pixel": [22, 25], "prefer": 22, "direct": 22, "buffer": 22, "cpp": 22, "024": 22, "04": [22, 30], "08t14": 22, "02": [22, 30, 39, 53], "380": 22, "info": [22, 27, 38, 39, 42], "servingsdk": 22, "impl": 22, "pluginsmanag": 22, "2024": [22, 42, 53], "391": 22, "modelserv": [22, 33, 39], "699": 22, "debug": [22, 38], "wlm": [22, 34, 38], "modelversionedref": 22, "modelmanag": [22, 34], "updatemodel": 22, "count": [22, 40, 41, 42], "kitten": [22, 23, 29, 31, 44, 53, 56], "jpg": [22, 23, 29, 31, 33, 44, 53, 56, 57], "tabbi": [22, 23, 29, 33], "40966302156448364": 22, "tiger_cat": [22, 23, 29], "3467046618461609": 22, "egyptian_cat": [22, 23, 29], "1300288736820221": 22, "lynx": [22, 23, 29], "02391958422958851": 22, "011532187461853027": 22, "myenv": 22, "pip": [22, 24, 29, 31, 35, 36, 54, 55], "torchaudio": 22, "torchdata": 22, "7": [22, 30, 35, 40, 44], "torchtext": [22, 52, 55], "torchvis": [22, 28, 46, 52, 55], "myenv3": 22, "nightli": [22, 30, 34, 47], "0b20240312": 22, "12b20240312": 22, "03": [22, 39, 42], "12t15": 22, "58": 22, "54": 22, "702": 22, "46661922335624695": 22, "46449029445648193": 22, "0661405548453331": 22, "001292439759708941": 22, "plastic_bag": [22, 29], "00022909720428287983": 22, "aggreg": [23, 34, 39, 40, 45], "send": [23, 25, 27, 29, 31, 33, 35], "ml": [23, 45], "dl": 23, "onc": [23, 31, 33, 35, 40, 41, 42, 47, 56], "design": [23, 25, 34], "nativ": [23, 25, 47], "most": [23, 25, 27, 33, 34, 38, 46, 47, 53], "turn": 23, "reduc": [23, 46, 47], "expens": [23, 46], "jump": 23, "what": [23, 32, 35, 47, 50], "max_batch_delai": [23, 39], "know": [23, 29, 46, 50], "fill": [23, 45], "full": [23, 31, 33, 35, 39, 40], "see": [23, 25, 27, 28, 29, 33, 35, 38, 39, 40, 41, 45, 47, 49, 50, 57, 58], "hug": 23, "4": [23, 25, 35, 39, 41, 44, 52, 53, 58, 59], "interest": [23, 47, 50], "delai": [23, 25, 38, 39, 59], "receiv": [23, 25, 31, 33, 35, 38, 40, 45, 59], "doesn": [23, 25, 39], "timer": 23, "ever": 23, "were": [23, 44, 50], "let": [23, 25, 50], "50": [23, 37, 59], "millisecond": [23, 25, 39, 40, 41, 59], "milli": 23, "second": [23, 25, 30, 39, 45, 47, 56], "defaultvers": [23, 25], "marnam": [23, 25], "minwork": [23, 25, 33, 35, 39, 58], "maxwork": [23, 25, 35, 39, 58], "maxbatchdelai": [23, 25, 35, 39, 58], "responsetimeout": [23, 25, 35], "120": [23, 25, 35, 39], "associ": [23, 40], "relat": [23, 29, 35, 56], "frontend": [23, 25, 29, 31, 33, 35, 54], "tri": [23, 35, 43], "bring": [23, 32], "inferenc": [23, 34], "thing": [23, 47, 50], "inference_address": [23, 25], "management_address": [23, 25], "go": [23, 32, 37, 46, 48], "10m": 23, "mar_fil": [23, 33, 35, 39, 42], "batch_v2": 23, "initial_work": [23, 39, 56], "properli": [23, 39, 53], "modelvers": [23, 39], "modelurl": [23, 39, 42], "loadedatstartup": [23, 39], "9000": [23, 38, 39, 41], "starttim": [23, 39], "2021": 23, "06": 23, "14t23": 23, "18": [23, 33, 38, 42, 45, 50], "21": [23, 41], "793z": 23, "memoryusag": [23, 39], "1726554112": 23, "19946": 23, "gpuusag": [23, 39], "mib": 23, "678": 23, "ljo": 23, "5798614621162415": 23, "38344162702560425": 23, "0342114195227623": 23, "0005819813231937587": 23, "quilt": 23, "000273319921689108": 23, "about": [23, 25, 27, 29, 46, 47, 50], "5000": [23, 58, 59], "Then": [23, 29, 35, 38], "14t22": 23, "44": [23, 39], "36": 23, "742z": 23, "19116": 23, "similar": [23, 35, 39], "previou": [23, 38, 39, 45, 58], "entrypoint": 23, "referenc": [23, 40], "metrics_address": [23, 25], "number_of_netty_thread": [23, 25], "32": [23, 42, 45], "job_queue_s": [23, 25], "1000": [23, 39], "home": [23, 24, 25, 29, 45, 54, 55, 56], "100": [23, 25, 35, 38, 39, 58], "cv": 23, "cu102": [23, 24, 29], "rm": [23, 37, 56], "p": [23, 24, 37, 56], "v": [23, 37, 39, 50, 56], "ubuntu": [23, 30, 39, 42, 45], "alreadi": [24, 29, 40, 46, 50, 53, 59], "dev": [24, 30, 34, 54], "cu121": [24, 29], "cu118": [24, 29], "cu117": [24, 29, 30], "cu116": [24, 29, 30], "cu113": [24, 29], "cu111": [24, 29], "cu101": [24, 29], "cu92": [24, 29], "gradlew": [24, 34], "clean": [24, 25, 47], "checkstyl": 24, "findbug": 24, "pmd": 24, "ut": 24, "cov": 24, "htmlcov": 24, "pylint": 24, "rn": 24, "rcfile": 24, "pylintrc": 24, "cd": [24, 31, 54, 55], "htmlcov_ut": 24, "model_archiv": 24, "htmlcov_it": 24, "integ_test": 24, "excut": 24, "npm": [24, 54], "linux": [24, 26, 30], "sudo": [24, 41, 45, 55], "apt": [24, 55], "y": 24, "nodej": [24, 54], "mac": 24, "brew": 24, "broken": 24, "directori": [24, 25, 27, 29, 50, 51, 53, 56], "recurs": [24, 31], "link_check_config": 24, "done": [24, 27, 47], "suffici": 25, "want": [25, 27, 29, 37, 38, 39, 43, 45, 50, 53, 56], "topic": [25, 32, 50], "avail": [25, 28, 29, 33, 35, 37, 39, 40, 47, 50, 51, 54, 56], "three": [25, 35, 40, 53], "prioriti": [25, 42, 53], "thei": [25, 35, 40, 50], "chang": [25, 27, 29, 33, 37, 39, 41, 51, 53, 54, 57, 58, 59], "behavior": [25, 27, 34, 35, 40, 42, 50, 53], "java": [25, 30, 49, 54], "pythonpath": [25, 39], "higher": [25, 45, 47], "ts_config_fil": 25, "log4j2": [25, 38, 40, 50], "xml": [25, 38, 40, 50], "foreground": 25, "footprint": [25, 45], "vmarg": [25, 38], "adjust": [25, 31, 39], "fit": [25, 34, 35], "present": [25, 27, 39, 40, 50, 54, 58], "model1": [25, 59], "model2": [25, 59], "disk": [25, 40, 46], "pathnam": 25, "avoid": [25, 29, 47], "bind": 25, "8443": [25, 56], "network": [25, 27, 46], "172": [25, 42], "grpc_inference_address": 25, "grpc_management_address": 25, "grpc_inference_port": 25, "grpc_management_port": 25, "grpc_inference_max_connection_age_m": 25, "infinit": [25, 39], "grpc_management_max_connection_age_m": 25, "grace": 25, "grpc_inference_max_connection_age_grace_m": 25, "grpc_management_max_connection_age_grace_m": 25, "443": 25, "whatev": 25, "traffic": 25, "must": [25, 27, 29, 33, 39, 42, 47, 53, 58], "password": 25, "pkcs12": 25, "pkcs8": 25, "openssl": 25, "x509": 25, "chain": 25, "keytool": 25, "storepass": 25, "own": [25, 27, 39, 45, 47], "genkei": 25, "keyalg": 25, "rsa": 25, "alia": [25, 54], "p12": 25, "changeit": 25, "storetyp": 25, "3600": 25, "keysiz": 25, "2048": 25, "dname": 25, "cn": 25, "my_t": 25, "ou": 25, "o": [25, 29, 33, 46, 55, 57], "l": 25, "palo": 25, "alto": 25, "st": 25, "california": 25, "8444": [25, 56], "8445": [25, 56], "keystore_pass": 25, "keystore_typ": 25, "sign": 25, "cert": 25, "req": 25, "dai": [25, 30], "365": [25, 41], "newkei": 25, "keyout": 25, "mykei": 25, "mycert": 25, "pem": 25, "private_key_fil": 25, "certificate_fil": 25, "addit": [25, 35, 39, 45, 46, 47, 48, 50], "header": [25, 33, 35, 37], "tell": [25, 33, 38, 47, 50], "browser": [25, 41], "web": [25, 49, 50], "domain": 25, "permiss": 25, "select": [25, 27, 35, 37, 45], "cors_allowed_origin": 25, "yourdomain": 25, "preflight": 25, "cors_allowed_method": 25, "put": [25, 27, 34, 35, 39, 56], "cors_allowed_head": 25, "xx": 25, "maxdirectmemorys": 25, "affect": [25, 46], "prefer_direct_buff": 25, "part": [25, 27, 32, 34, 39, 58], "seamless": [25, 27], "install_py_dep_per_model": [25, 56], "tar": 25, "gz": [25, 38], "might": [25, 29, 38, 39, 46, 50, 53], "sensit": 25, "credenti": [25, 39], "secur": [25, 26, 32, 42, 53], "blacklist_env_var": 25, "regular": [25, 47], "express": 25, "number_of_gpu": [25, 47, 56], "pci": 25, "bu": 25, "enable_metrics_api": [25, 41], "parametername1": 25, "parametervalue1": 25, "parametername2": 25, "parametervalue2": 25, "parameternamen": 25, "parametervaluen": 25, "minimum": [25, 39, 59], "msec": 25, "timeout": [25, 34, 35, 39, 59], "sec": 25, "over": [25, 31, 33, 35, 45, 46, 56, 59], "default_response_timeout": 25, "noop": [25, 39], "vgg16": [25, 44, 50], "embed": [25, 27], "distinct": 25, "determin": [25, 34, 35, 38, 45, 47, 56], "final": [25, 45], "lowest": 25, "highest": [25, 33, 46], "fulli": [25, 45, 47], "pippi": 25, "rpc": [25, 31, 35], "deviceid": [25, 35, 40], "round": [25, 27, 35, 39], "robin": [25, 27, 35], "strategi": [25, 30], "assign": [25, 34, 35, 39], "otherwis": [25, 28, 35], "tune": [25, 32, 47], "impact": [25, 40, 46, 47, 53], "scalabl": 25, "throughput": [25, 29, 35, 38, 45, 46, 47], "enable_envvars_config": [25, 42, 53], "child": 25, "eventloopgroup": 25, "group": 25, "eventloop": [25, 34], "event": 25, "logic": [25, 35, 47, 50], "netty_client_thread": 25, "workerthread": [25, 34], "default_workers_per_model": 25, "queue": [25, 35, 40, 45], "async_log": [25, 38], "asynchron": [25, 39], "deem": [25, 39], "unrespons": [25, 39], "reboot": [25, 39], "unregister_model_timeout": 25, "decode_input_request": 25, "decod": [25, 33, 35, 39, 46], "known": [25, 27, 40, 46, 51, 55], "bytearrai": 25, "convers": [25, 47], "initial_worker_port": 25, "model_server_hom": 25, "pil": 25, "larg": [25, 26, 32, 36, 37, 43, 46, 47], "payload": 25, "comma": 25, "amazonaw": 25, "workflow_stor": [25, 50, 58], "disable_system_metr": 25, "system_metrics_cmd": 25, "empti": [25, 31, 35], "collector": 25, "ts_": 25, "property_nam": 25, "ts_inference_address": 25, "troubleshoot": [26, 32], "coverag": 26, "advanc": [26, 35], "window": [26, 33, 51], "subsystem": 26, "wsl": 26, "polici": [26, 32, 38], "faq": [26, 32], "invok": [27, 50, 56], "Is": [27, 43], "ll": [27, 29, 47], "act": 27, "def": [27, 31, 33, 35, 39, 40, 59], "entry_point_function_nam": 27, "sampl": [27, 29, 37, 44], "jit": [27, 34, 47], "similarli": 27, "global": [27, 41, 53, 59], "is_avail": 27, "serialized_fil": 27, "serializedfil": 27, "os": [27, 30, 39], "join": [27, 33, 35], "isfil": 27, "engag": 27, "ask": [27, 32], "startup": [27, 34, 42, 51], "down": [27, 51, 53], "typic": [27, 40], "modelhandl": [27, 35], "__init__": [27, 35], "_context": 27, "prediciton": 27, "pred_out": 27, "unexpect": 27, "513": 27, "nonetheless": 27, "below": [27, 28, 29, 35, 41, 43, 45], "init": [27, 35], "pattern": [27, 38], "maintain": [27, 39], "model_handl": 27, "preprocessed_data": 27, "model_input": 27, "ndarrai": 27, "model_output": 27, "inference_output": 27, "postprocess_output": 27, "achiev": [27, 33, 35, 46], "place": [27, 56], "written": [27, 48], "hi": 27, "algorithm": [27, 34, 35], "lig": 27, "layerintegratedgradi": 27, "captum_sequence_forward": 27, "_is_explain": [27, 39], "so": [27, 29, 35, 37, 40, 54], "neccessari": 27, "logger": [27, 39], "row": 27, "isinst": 27, "statement": [27, 53], "default_handler_nam": 27, "defaulthandlerclass": 27, "customimageclassifi": 27, "procsess": 27, "goe": 27, "digit": [27, 44, 50], "model_version_numb": 27, "path_to_model_architecture_fil": 27, "path_to_state_dict_fil": 27, "comma_seperarted_additional_fil": 27, "skip": 27, "waveglow_synthes": 27, "waveglow_model": 27, "nvidia_waveglowpyt_fp32_20190306": 27, "pth": [27, 29, 56], "waveglow_handl": 27, "tacotron": 27, "nvidia_tacotron2pyt_fp32_20190306": 27, "vcpu": [27, 29], "fashion": [27, 45], "consum": 28, "imagenet": [28, 44], "dataset": [28, 44], "rgb": 28, "top": [28, 48], "ag": 28, "comprehens": 28, "page": [28, 35, 39, 44, 47, 50, 58], "automat": [28, 29, 31, 35, 37, 48, 56], "numer": 28, "simpli": [28, 35, 48], "welcom": 28, "isn": 28, "cover": [28, 38, 47, 50], "model_packag": 28, "alwai": [28, 46, 48], "saniti": 28, "submit": [28, 34, 44], "conda": [29, 34], "12": [29, 53], "9": [29, 44], "complet": [29, 31, 33, 35, 38, 39, 46, 56], "clone": [29, 31, 54, 55], "repositori": 29, "git": [29, 31, 52, 54, 55], "parent": 29, "root": 29, "my_path": 29, "mkdir": [29, 31, 56], "wget": [29, 55], "8d451a50": 29, "repo": [29, 31], "densenet_161": 29, "index_to_nam": [29, 56], "equal": [29, 33, 35, 40], "power": [29, 32], "lot": [29, 50], "autosc": 29, "consider": 29, "minim": [29, 35, 41], "move": [29, 35, 56], "later": 29, "finer": 29, "grain": 29, "u": [29, 31], "grpcio": [29, 31], "protobuf": [29, 31, 35], "proto": [29, 31], "grpc_tool": [29, 31], "protoc": [29, 31], "proto_path": [29, 31], "src": [29, 31, 34, 38, 54], "python_out": [29, 31], "grpc_python_out": [29, 31], "cute": 29, "githubusercont": [29, 33, 55, 57], "kitten_smal": [29, 33, 44, 57], "46933549642562866": 29, "4633878469467163": 29, "06456148624420166": 29, "0012828214094042778": 29, "00023323034110944718": 29, "seen": 29, "deep": [29, 35, 47, 50], "learn": [29, 32, 35, 47, 50], "registr": [29, 34, 39, 42, 56, 58, 59], "record": 29, "high": [29, 31, 32, 33, 35, 38, 46, 47, 50], "level": [29, 38, 40, 41, 42, 45, 46, 47, 50, 59], "percentil": 29, "precis": 29, "visual": [29, 54], "debugg": 29, "under": [30, 39, 47, 48, 54], "trigger": 30, "manual": 30, "workflow_dispatch": 30, "push": 30, "branch": 30, "pull": [30, 34, 44], "pull_request": 30, "15am": 30, "schedul": [30, 45, 47], "cron": 30, "15": [30, 38, 41, 52], "everyth": 30, "align": 30, "machin": [30, 32], "20": [30, 39, 41, 46], "term": [30, 45], "matrix": 30, "fail": [30, 54, 59], "fast": [30, 37, 47], "indic": [30, 56], "One": 30, "v3": 30, "architectur": [30, 32], "x84": 30, "zulu": 30, "shell": 30, "codecov": 30, "chmod": 30, "streampredict": [31, 35], "registermodel": [31, 39], "unregistermodel": [31, 39], "free": [31, 39, 47, 58], "scalework": [31, 39], "dynam": [31, 32, 34, 35, 39, 40, 46], "better": [31, 35, 39, 45, 47], "listmodel": [31, 39], "queri": [31, 39, 41, 58], "describemodel": [31, 39], "setdefault": [31, 39], "googleapi": 31, "stub": 31, "third_parti": 31, "intermedi": [31, 33, 35], "until": [31, 33, 35, 39], "forc": [31, 35], "inferenceapisservic": [31, 33, 35], "torchservehealthrespons": [31, 35], "predictionsrequest": [31, 35], "predictionrespons": [31, 35], "style": [31, 33, 35], "handler_util": [31, 33, 35], "v1": [31, 33, 35, 39], "rang": [31, 33, 35, 40], "intermediate_respons": [31, 33, 35], "success": [31, 33, 35, 38, 51], "hello": [31, 33, 35, 37], "world": [31, 33, 35], "llama": [32, 37], "inferentia2": [32, 37], "naver": 32, "studi": [32, 47], "transit": 32, "cost": [32, 46, 47], "intel": [32, 46], "oneapi": 32, "softwar": 32, "sagemak": [32, 34], "75": 32, "four": [32, 45], "tpuv5": 32, "monitor": 32, "datadog": 32, "anim": [32, 47], "draw": [32, 47], "walmart": 32, "search": 32, "grok": 32, "principl": [32, 46], "inferentia": 32, "children": 32, "life": 32, "evolut": 32, "cresta": 32, "migrat": 32, "quantit": 32, "comparison": 32, "platform": [32, 34, 48], "indepth": 32, "why": [32, 50], "best": [32, 47], "practic": 32, "improv": [32, 36, 45, 46, 47], "perfrom": 32, "view": [32, 33, 39, 41, 46, 50], "torcherv": 32, "frequent": 32, "question": 32, "correct": [33, 39, 53], "swagger": [33, 39, 49], "codegen": [33, 39, 49], "maxretrytimeoutinsec": 33, "5min": 33, "recov": 33, "dead": 33, "activ": [33, 35, 44, 45, 46, 51], "unhealthi": 33, "less": 33, "resnet": [33, 38, 42, 44, 50], "f": [33, 37, 56], "squeezenet1_1": [33, 39, 42], "dog": [33, 44, 50], "open": [33, 34, 35, 47, 53], "rb": 33, "n02123045": 33, "42514491081237793": 33, "chunk": 33, "test_echo_stream_infer": [33, 35], "start_torchserv": [33, 35], "no_config_snapshot": [33, 35], "gen_mar": [33, 35], "register_model": [33, 35], "echo_stream": [33, 35], "tf_inference_api": [33, 35], "foo": [33, 35], "assert": [33, 35], "transfer": [33, 35], "iter_cont": [33, 35], "chunk_siz": [33, 35], "append": [33, 35, 54], "unregister_model": [33, 35], "test_data": 33, "png": [33, 44], "004570948731989492": 33, "006216969640322402": 33, "008197565423679522": 33, "009563574612830427": 33, "008999274832810742": 33, "009673474804303854": 33, "007599905146155397": 33, "kf_request_json": 33, "spec": 33, "grade": 34, "track": [34, 46, 47], "those": [34, 42, 47, 51], "weight": [34, 43, 47, 50], "compon": [34, 46, 54], "portion": 34, "lifecycl": 34, "actual": [34, 50], "script_modul": 34, "eager_mode_model": 34, "along": [34, 35, 48, 50, 56], "storag": 34, "authz": 34, "authn": 34, "drop": [34, 47], "loadabl": 34, "dockerfil": [34, 37], "experiment": 34, "k8": 34, "serving_sdk": 34, "stuff": 34, "termin": [34, 38, 39, 54], "easier": [34, 47], "kfserv": 34, "startserv": 34, "8903ca1fb059eab3c1e8eccdee1376d4ff52fb67": 34, "workerstatelisten": 34, "close": 34, "workerst": 34, "workloadmanag": 34, "concurr": [34, 45, 46, 47], "hashmap": 34, "backendgroup": 34, "threadpool": 34, "executor": 34, "pool": 34, "task": 34, "batchaggreg": 34, "apiutil": 34, "configmanag": [34, 54], "split": [35, 59], "partit": 35, "vllm": [35, 37], "speed": [35, 46, 47], "pp": 35, "accommod": 35, "torchrun": 35, "paralleltyp": 35, "tp": 35, "pptp": 35, "leav": 35, "nproc": 35, "OR": [35, 40, 56], "parallellevel": 35, "visibl": [35, 37], "suppos": 35, "eight": [35, 45], "worker1": 35, "worker2": 35, "illustr": 35, "stage": 35, "microbatch": 35, "inherit": [35, 39, 43], "our": [35, 37, 45], "custom_handl": 35, "base_pippy_handl": 35, "basepippyhandl": 35, "pt_pippi": 35, "initialize_rpc_work": 35, "get_pipline_driv": 35, "super": [35, 38, 50], "local_rank": 35, "device_count": 35, "world_siz": 35, "rout": 35, "rank0": 35, "rank": 35, "fontend": 35, "wish": [35, 51], "input_nam": 35, "input_id": 35, "fx": 35, "trace": 35, "model_typ": 35, "hf": [35, 37], "wise": 35, "blank": 35, "rpc_timeout": 35, "1800": 35, "num_worker_thread": 35, "max_length": 35, "80": 35, "length": [35, 39, 46], "bloom": 35, "pippy_handl": 35, "model_checkpoints_path": 35, "tgz": 35, "progress": 35, "soon": 35, "microsoft": [35, 54], "get_ds_engin": 35, "base_deepspeed_handl": 35, "basedeepspeedhandl": 35, "ds_engin": 35, "ds": 35, "filenam": [35, 38], "dtype": 35, "float16": 35, "replace_with_kernel_inject": 35, "tensor_parallel": 35, "tp_size": 35, "method1": 35, "method2": 35, "ds_build_op": 35, "deepspeed_handl": 35, "advantag": 35, "further": [35, 37, 38, 40, 41], "maxim": 35, "appli": [35, 47], "low_cpu_mem_usag": 35, "checkpoint": 35, "pretrain": [35, 56], "hub": [35, 37, 56], "cach": [35, 37, 40], "caus": 35, "omp_number_thread": 35, "immedi": [35, 39], "incur": 35, "503": 35, "chatgpt": 35, "effect": [35, 46], "action": 35, "reject": 35, "capac": 35, "busi": 35, "jobqueues": 35, "usejobticket": 35, "graviton": 36, "m7g": 36, "4x": 36, "dnnl_default_fpmath_mod": 36, "bf16": 36, "lru_cache_capac": 36, "1024": 36, "synthesi": 36, "meta": [37, 47], "llama3": 37, "besid": 37, "offer": [37, 45, 46, 48], "volum": [37, 56], "faster": [37, 43, 44, 46], "reload": [37, 41], "gate": 37, "huggingface_hub_token": 37, "ahead": [37, 40], "ti": 37, "shm": 37, "1g": 37, "hugging_face_hub_token": 37, "model_id": 37, "8b": 37, "instruct": [37, 41, 47, 54, 56], "disable_token_auth": 37, "exchang": 37, "my": 37, "max_new_token": 37, "samplingparam": 37, "keyword": 37, "temperatur": 37, "renam": 37, "some_nam": [37, 40], "outsid": [37, 47], "llm_launcher": 37, "3rd": 37, "parti": 37, "team": 37, "7b": 37, "chat": 37, "mistralai": 37, "mistral": 37, "v0": [37, 38, 39], "varieti": 37, "bot": 37, "gpt": 37, "understand": [38, 46, 48, 56], "layout": 38, "familiar": 38, "log4j": [38, 50], "rollingfil": 38, "access_log": [38, 42], "env": [38, 53, 54], "filepattern": 38, "dd": 38, "mmm": 38, "patternlayout": 38, "iso8601": 38, "sizebasedtriggeringpolici": 38, "timebasedtriggeringpolici": 38, "defaultrolloverstrategi": 38, "2018": [38, 39], "13": [38, 41], "56": 38, "976": 38, "backendwork": 38, "64003": 38, "118": 38, "remot": [38, 47, 49, 50], "took": 38, "ts_log": 38, "5p": 38, "stderr": 38, "14": 38, "46": [38, 42], "51": 38, "656": 38, "workerlifecycl": 38, "nnvm": 38, "legacy_json_util": 38, "cc": 38, "209": 38, "symbol": 38, "upgrad": 38, "657": 38, "217": [38, 44], "successfulli": [38, 56, 58], "59": 38, "926": 38, "60": [38, 53], "117": 38, "31": [38, 42], "52": 38, "dlog4j": 38, "configurationfil": 38, "altern": [38, 39], "lightweight": 38, "consid": [38, 45], "recent": 38, "lost": 38, "unexpectedli": 38, "decreas": [39, 45], "refresh": 39, "delet": [39, 42, 53, 58], "managementapisservic": 39, "subfold": [39, 58], "module_nam": 39, "method_nam": 39, "least": 39, "synchron": [39, 56], "acknowledg": 39, "onlin": 39, "response_timeout": 39, "respond": 39, "period": [39, 40], "squeezenet_v1": [39, 50], "sse": 39, "secret": 39, "region": 39, "squeezenet1": 39, "account": [39, 40], "iam": 39, "role": 39, "awss3readonlyaccess": 39, "s3_sse_km": 39, "forgot": 39, "between": [39, 45, 53], "202": 39, "4dc54158": 39, "c6de": 39, "42aa": 39, "b5dd": 39, "ebcb5f721043": 39, "47": [39, 42], "aliv": 39, "ok": 39, "ecd2e502": 39, "382f": 39, "4c3b": 39, "b425": 39, "519fbf6d3b85": 39, "89": 39, "min_work": [39, 56], "max_work": 39, "pend": 39, "exceed": 39, "flavor": 39, "42adc58": 39, "6956": 39, "4198": 39, "ad07": 39, "db6c620c4c1e": 39, "b72b1ea0": 39, "81c6": 39, "4cce": 39, "92c4": 39, "530d3cfe5d4a": 39, "63": 39, "3997ccd4": 39, "ae44": 39, "4570": 39, "b249": 39, "e361b08d3d47": 39, "77": 39, "02t13": 39, "53": 39, "034z": 39, "89247744": 39, "jobqueuestatu": 39, "remainingcapac": 39, "pendingrequest": 39, "noop_2": 39, "metadata": [39, 50], "output_describ": 39, "_is_describ": 39, "start_tim": [39, 40], "is_profiler_en": 39, "enable_torch_profil": [39, 47], "_infer_with_profil": [39, 47], "stop_tim": 39, "handlertim": [39, 40, 41], "customizedmetadata": 39, "9010": 39, "2022": 39, "08t11": 39, "974z": 39, "98972": 39, "data1": 39, "data2": 39, "next_page_token": [39, 58], "next": [39, 45, 54, 56, 58], "pagin": [39, 58], "nextpagetoken": [39, 58], "noop_v0": 39, "bearer": [39, 53], "expir": [39, 53], "broadli": 40, "minut": [40, 53], "metrics_mod": [40, 41], "ts_metrics_mod": 40, "agent": 40, "log_directori": 40, "ts_metric": [40, 42], "model_metr": 40, "releas": [40, 46], "ts_inference_requests_tot": [40, 41], "ts_inference_latency_microsecond": [40, 41], "ts_queue_latency_microsecond": [40, 41], "my_model": 40, "chosen": 40, "model_metrics_auto_detect": 40, "ts_model_metrics_auto_detect": 40, "cold": 40, "subsequ": 40, "often": [40, 46], "mitig": [40, 46], "terminolog": 40, "alias": 40, "nameofcountermetr": 40, "gaug": [40, 41], "nameofgaugemetr": 40, "histogram": 40, "nameofhistogrammetr": 40, "inferencetimeinm": 40, "numberofmetr": 40, "gaugemodelmetricnameexampl": 40, "histogrammodelmetricnameexampl": 40, "toward": 40, "percent": [40, 41], "metrictyp": 40, "semant": 40, "requests2xx": [40, 41, 42], "hostnam": [40, 41, 42], "total": 40, "300": 40, "requests4xx": [40, 41, 42], "400": 40, "requests5xx": [40, 41], "microsecond": [40, 41], "queuetim": [40, 41], "spent": 40, "workerthreadtim": [40, 41], "exclud": 40, "workerloadtim": [40, 41], "workernam": [40, 41], "taken": [40, 51, 56], "cpuutil": [40, 41], "memoryus": [40, 41], "megabyt": [40, 41], "memoryavail": [40, 41], "memoryutil": [40, 41], "diskusag": [40, 41], "gigabyt": [40, 41], "diskutil": [40, 41], "diskavail": [40, 41], "gpumemoryutil": [40, 41], "gpumemoryus": [40, 41], "gpuutil": [40, 41], "predictiontim": [40, 41], "confus": [40, 41], "fetch": [40, 41], "coupl": 40, "name_of_model": 40, "dim1": 40, "dim2": 40, "some_valu": 40, "dimn": 40, "name_n": 40, "value_n": 40, "add_metric_to_cach": 40, "dimension_nam": 40, "metric_typ": 40, "cachingmetr": 40, "newli": 40, "add_or_upd": 40, "dimension_valu": 40, "distance_metr": 40, "distanceinkm": 40, "distanc": 40, "inferencetim": 40, "end_tim": 40, "sizeofimag": 40, "img_siz": 40, "utilization_perc": 40, "callcount": 40, "call_count": 40, "get_metr": 40, "metricscachekeyerror": 40, "gauge_metr": 40, "gaugemetricnam": 40, "gauge_metric_valu": 40, "88665a372f4b": 41, "ant": 41, "054508209228516": 41, "resnet18": [41, 44], "83": 41, "resnet18_1": 41, "4593": 41, "9001": 41, "4592": 41, "5829": 41, "7421875": 41, "82": 41, "93": 41, "290371": 41, "129": 41, "8245": 41, "62109375": 41, "325": 41, "05113983154297": 41, "64": 41, "globoff": 41, "yml": 41, "scrape_interv": 41, "evaluation_interv": 41, "scrape_config": 41, "job_nam": 41, "static_config": 41, "9090": 41, "navig": 41, "graph": [41, 47], "3000": 41, "systemctl": 41, "daemon": [41, 45], "abil": 42, "unintend": 42, "scenario": [42, 45, 47, 53], "ts_enable_model_api": 42, "switch": 42, "enable_model_api": 42, "At": [42, 47, 54], "explicitli": [42, 47], "unabl": 42, "05": 42, "30t21": 42, "625": 42, "epolleventloopgroup": 42, "53514": 42, "405": 42, "626": 42, "timestamp": [42, 51], "1717105563": 42, "methodnotallowedexcept": 42, "41": [42, 44], "098": 42, "36270": 42, "099": 42, "1717105307": 42, "flowchart": 43, "simplifi": [43, 48], "td": 43, "id1": 43, "id13": 43, "id2": 43, "id3": 43, "id4": 43, "id5": 43, "onnx": [43, 47], "id6": 43, "tensorrt": [43, 46, 47], "id7": 43, "id8": 43, "id9": 43, "id10": 43, "id11": 43, "id21": 43, "id20": 43, "id15": 43, "id16": 43, "id14": 43, "id22": 43, "archiev": 43, "id17": 43, "propos": 44, "inclus": 44, "thank": 44, "216": 44, "106": 44, "489": 44, "squeezenet": [44, 50], "1_1": 44, "152": 44, "214": 44, "rcnn": 44, "coco": 44, "148": 44, "person": 44, "mask": 44, "158": 44, "ag_new": 44, "169": 44, "sample_text": 44, "fcn": 44, "101": 44, "193": 44, "deeplabv3": 44, "384": 44, "386": 44, "105": 44, "42": 44, "215": 44, "mmf": 44, "recognit": [44, 50], "charad": 44, "549": 44, "372cc": 44, "mp4": 44, "mgpu": 44, "sample_text_captum_input": [44, 45], "breed": 44, "war": [44, 58, 59], "spin": 45, "thu": [45, 46, 53], "kernel": [45, 47], "drawback": [45, 47], "occupi": [45, 47], "leverag": [45, 47], "benefit": [45, 46], "smi": [45, 46, 54, 56], "exclus": 45, "shutdown": [45, 51], "echo": [45, 55], "quit": 45, "48": 45, "volta": 45, "lead": 45, "failur": [45, 59], "decis": 45, "repres": 45, "workload": [45, 47], "primarili": 45, "investig": 45, "evolv": 45, "g4dn": 45, "4xlarg": 45, "2xlarg": 45, "concentr": 45, "measur": 45, "ab": 45, "still": 45, "left": 45, "five": 45, "median": 45, "overwrit": [45, 47], "accordingli": 45, "huggingface_transform": 45, "bertseqclassif": 45, "10000": [45, 59], "600": 45, "seq_classification_artifact": 45, "skew": [45, 48], "therefor": 45, "neglect": 45, "figur": 45, "steadi": 45, "wors": 45, "interpret": 45, "experi": 45, "bigger": [45, 47], "With": [45, 56], "diminish": 45, "25": 45, "saw": 45, "mix": 45, "certain": 45, "suggest": [45, 46], "highli": [45, 47], "situat": 45, "sole": 45, "focus": [45, 50], "pack": [45, 50], "diagnos": 46, "nlp": 46, "pad": 46, "too": 46, "driver": 46, "oftentim": 46, "bug": 46, "overal": 46, "htop": 46, "obviou": 46, "biggest": 46, "bottleneck": [46, 47], "beyond": 46, "quantifi": 46, "slow": 46, "scope": 46, "async": 46, "hide": 46, "constant": 46, "unnecessarili": 46, "magnitud": 46, "short": [46, 50], "jpeg": 46, "libjpeg": 46, "turbo": 46, "simd": 46, "dali": [46, 47], "old": 46, "k80": 46, "factor": 46, "low": [46, 47], "hang": 46, "fruit": 46, "evalu": 46, "inference_mod": 46, "bump": 46, "fp16": 46, "doubl": 46, "newer": 46, "neglig": 46, "accuraci": 46, "degrad": 46, "technic": 46, "quantiz": [46, 47], "seldom": 46, "suffer": 46, "loss": 46, "explor": 46, "articl": [46, 47], "neural": 46, "int8": 46, "compressor": 46, "sophist": 46, "worth": 46, "signific": 46, "right": [46, 54], "approach": [46, 47], "balanc": 46, "smart": 46, "meet": 46, "sla": 46, "onnxruntim": 46, "lightseq": 46, "ctranslat": 46, "fusion": [46, 47], "distil": [46, 47], "gain": 46, "minilm": 46, "99": 46, "origin": 46, "2x": [46, 47], "sort": 46, "unnecessari": [46, 47], "exhaust": 46, "squeez": 46, "checklist": 47, "trick": 47, "prune": 47, "encourag": 47, "hard": 47, "easiest": 47, "ort": 47, "fastertransform": 47, "favorit": 47, "feel": 47, "pr": 47, "8x": 47, "basi": 47, "10x": 47, "regard": 47, "genai": 47, "link": [47, 50, 56], "acceler": 47, "bit": 47, "ort_sess": 47, "inferencesess": 47, "sess_opt": 47, "treat": 47, "vs": 47, "nn": 47, "transformerencod": 47, "bettertransform": 47, "exce": 47, "speedup": 47, "vari": 47, "batch_delai": 47, "lower": 47, "heavi": 47, "On": 47, "significantli": 47, "theori": 47, "discuss": [47, 50], "quickli": 47, "summar": 47, "hyperthread": 47, "affin": 47, "physic": 47, "numa": 47, "cross": 47, "ex": [47, 50, 53, 54], "contrari": 47, "clearli": 47, "sub": 47, "valuetoset": 47, "uniqu": 47, "around": 47, "portabl": 47, "iter": 47, "popular": 47, "block": 47, "helper": 47, "p50": 47, "p90": 47, "p99": 47, "visit": 47, "app": [47, 50, 54], "fine": [47, 54], "scientist": 48, "analyt": 48, "though": 48, "underli": 48, "plain": 48, "crucial": 48, "assum": [48, 56], "service_envelop": 48, "scala": 49, "javascript": 49, "snap": 50, "pictur": 50, "scene": 50, "identif": 50, "intak": 50, "littl": 50, "weed": 50, "dive": 50, "kind": [50, 56], "themselv": 50, "long": 50, "stori": 50, "ts_config": 50, "model_path1": 50, "model_path2": 50, "log_config": 50, "exit": 50, "model_loc": 50, "talk": 50, "sai": 50, "fanci": 50, "net": 50, "hotdog": 50, "sens": 50, "hot": 50, "name2": 50, "model_location2": 50, "compris": [51, 59], "cfg": 51, "shut": [51, 53], "intention": 51, "resili": 51, "katex": 52, "recommonmark": 52, "sphinxcontrib": 52, "pyyaml": 52, "theme": 52, "ts_disable_token_author": 53, "auth": 53, "disable_token_author": 53, "preced": 53, "tokenauthorizationhandl": 53, "key_fil": 53, "e5ksrm": 53, "16t21": 53, "24": 53, "801167z": 53, "gnrua7d": 53, "801148z": 53, "yv9uqajp": 53, "i_j_itmb": 53, "finhr1fj": 53, "m4m": 53, "5ibi": 53, "THE": 53, "displai": 53, "token_expiration_min": 53, "30": 53, "owner": 53, "adapt": 53, "certifi": 54, "2019": [54, 55], "admin": 54, "anaconda": 54, "powershel": 54, "openjdk17": 54, "unzip": 54, "edit": 54, "jdk": [54, 55], "3_window": 54, "x64_bin": 54, "gui": [54, 56], "wheel": 54, "prebuilt": 54, "click": 54, "whl": 54, "java_hom": 54, "redistribut": 54, "studio": 54, "2015": 54, "2017": 54, "your_install_dr": 54, "program": 54, "corpor": 54, "nvsmi": 54, "prod": 54, "setup_wsl_ubuntu": 55, "bash": 55, "bashrc": 55, "openjdk": 55, "sentencepiec": 55, "conveni": 56, "prerequisit": 56, "chrome": 56, "default_handl": 56, "your_model_nam": 56, "your_model_fil": 56, "mv": 56, "directli": 56, "copi": 56, "your_model_store_dir": 56, "kept": 56, "min": [56, 59], "your_input_fil": 56, "demostr": 56, "public_url": [56, 58], "your_path": 56, "8433": 56, "local_dir": 56, "your_docker_imag": 56, "s3_path": 56, "notic": 56, "proerti": 56, "apart": 56, "lib": 56, "your_custom_handler_py_fil": 56, "fairseq": 56, "your_requirements_txt": 56, "anoth": 56, "wfpredict": 57, "workflow_nam": [57, 58], "myworkflow": [57, 58], "leaf": 57, "dag": [57, 58], "workflownam": 58, "workflowurl": 58, "workflowdag": 58, "m1": [58, 59], "myworkflow1": 58, "myworkflow2": 58, "ensembl": 59, "flow": 59, "m2": 59, "m3": 59, "retri": 59, "model3": 59, "m4": 59, "model4": 59, "pre_process": 59, "syntax": 59, "ridden": 59, "dataflow": 59, "eg": 59, "function1": 59, "function2": 59, "aggregate_func": 59, "workflow_inference_api": 59, "workflow_management_api": 59, "serializ": 59, "arrai": 59}, "objects": {"": [[8, 0, 0, "-", "test"], [10, 0, 0, "-", "ts"]], "ts": [[10, 0, 0, "-", "arg_parser"], [10, 0, 0, "-", "context"], [11, 0, 0, "-", "metrics"], [10, 0, 0, "-", "model_loader"], [10, 0, 0, "-", "model_server"], [12, 0, 0, "-", "model_service"], [10, 0, 0, "-", "model_service_worker"], [13, 0, 0, "-", "protocol"], [10, 0, 0, "-", "service"], [14, 0, 0, "-", "torch_handler"], [19, 0, 0, "-", "utils"], [10, 0, 0, "-", "version"]], "ts.arg_parser": [[10, 1, 1, "", "ArgParser"]], "ts.arg_parser.ArgParser": [[10, 2, 1, "", "extract_args"], [10, 2, 1, "", "model_service_worker_args"], [10, 2, 1, "", "ts_parser"]], "ts.context": [[10, 1, 1, "", "Context"], [10, 1, 1, "", "RequestProcessor"]], "ts.context.Context": [[10, 2, 1, "", "get_all_request_header"], [10, 2, 1, "", "get_request_header"], [10, 2, 1, "", "get_request_id"], [10, 2, 1, "", "get_response_content_type"], [10, 2, 1, "", "get_response_headers"], [10, 2, 1, "", "get_response_status"], [10, 2, 1, "", "get_sequence_id"], [10, 3, 1, "", "metrics"], [10, 3, 1, "", "request_processor"], [10, 2, 1, "", "set_all_response_status"], [10, 2, 1, "", "set_response_content_type"], [10, 2, 1, "", "set_response_header"], [10, 2, 1, "", "set_response_status"], [10, 3, 1, "", "system_properties"]], "ts.context.RequestProcessor": [[10, 2, 1, "", "add_response_property"], [10, 2, 1, "", "get_request_properties"], [10, 2, 1, "", "get_request_property"], [10, 2, 1, "", "get_response_header"], [10, 2, 1, "", "get_response_headers"], [10, 2, 1, "", "get_response_status_code"], [10, 2, 1, "", "get_response_status_phrase"], [10, 2, 1, "", "report_status"]], "ts.metrics": [[11, 0, 0, "-", "dimension"], [11, 0, 0, "-", "metric"], [11, 0, 0, "-", "metric_collector"], [11, 0, 0, "-", "metric_encoder"], [11, 0, 0, "-", "metrics_store"], [11, 0, 0, "-", "process_memory_metric"], [11, 0, 0, "-", "system_metrics"], [11, 0, 0, "-", "unit"]], "ts.metrics.dimension": [[11, 1, 1, "", "Dimension"]], "ts.metrics.dimension.Dimension": [[11, 2, 1, "", "to_dict"]], "ts.metrics.metric": [[11, 1, 1, "", "Metric"]], "ts.metrics.metric.Metric": [[11, 2, 1, "", "reset"], [11, 2, 1, "", "to_dict"], [11, 2, 1, "", "update"]], "ts.metrics.metric_encoder": [[11, 1, 1, "", "MetricEncoder"]], "ts.metrics.metric_encoder.MetricEncoder": [[11, 2, 1, "", "default"]], "ts.metrics.metrics_store": [[11, 1, 1, "", "MetricsStore"]], "ts.metrics.metrics_store.MetricsStore": [[11, 2, 1, "", "add_counter"], [11, 2, 1, "", "add_error"], [11, 2, 1, "", "add_metric"], [11, 2, 1, "", "add_percent"], [11, 2, 1, "", "add_size"], [11, 2, 1, "", "add_time"]], "ts.metrics.process_memory_metric": [[11, 4, 1, "", "check_process_mem_usage"], [11, 4, 1, "", "get_cpu_usage"]], "ts.metrics.system_metrics": [[11, 4, 1, "", "collect_all"], [11, 4, 1, "", "cpu_utilization"], [11, 4, 1, "", "disk_available"], [11, 4, 1, "", "disk_used"], [11, 4, 1, "", "disk_utilization"], [11, 4, 1, "", "gpu_utilization"], [11, 4, 1, "", "memory_available"], [11, 4, 1, "", "memory_used"], [11, 4, 1, "", "memory_utilization"]], "ts.metrics.unit": [[11, 1, 1, "", "Units"]], "ts.model_loader": [[10, 1, 1, "", "ModelLoader"], [10, 1, 1, "", "ModelLoaderFactory"], [10, 1, 1, "", "TsModelLoader"]], "ts.model_loader.ModelLoader": [[10, 2, 1, "", "load"]], "ts.model_loader.ModelLoaderFactory": [[10, 2, 1, "", "get_model_loader"]], "ts.model_loader.TsModelLoader": [[10, 2, 1, "", "load"]], "ts.model_server": [[10, 4, 1, "", "load_properties"], [10, 4, 1, "", "start"]], "ts.model_service": [[12, 0, 0, "-", "model_service"]], "ts.model_service.model_service": [[12, 1, 1, "", "ModelService"], [12, 1, 1, "", "SingleNodeService"]], "ts.model_service.model_service.ModelService": [[12, 2, 1, "", "handle"], [12, 2, 1, "", "inference"], [12, 2, 1, "", "initialize"], [12, 2, 1, "", "ping"], [12, 2, 1, "", "signature"]], "ts.model_service.model_service.SingleNodeService": [[12, 2, 1, "", "inference"]], "ts.model_service_worker": [[10, 1, 1, "", "TorchModelServiceWorker"]], "ts.model_service_worker.TorchModelServiceWorker": [[10, 2, 1, "", "handle_connection"], [10, 2, 1, "", "load_model"], [10, 2, 1, "", "run_server"]], "ts.protocol": [[13, 0, 0, "-", "otf_message_handler"]], "ts.protocol.otf_message_handler": [[13, 4, 1, "", "create_load_model_response"], [13, 4, 1, "", "create_predict_response"], [13, 4, 1, "", "encode_response_headers"], [13, 4, 1, "", "retrieve_msg"], [13, 4, 1, "", "send_intermediate_predict_response"]], "ts.service": [[10, 1, 1, "", "Service"], [10, 4, 1, "", "emit_metrics"]], "ts.service.Service": [[10, 3, 1, "", "context"], [10, 2, 1, "", "predict"], [10, 2, 1, "", "retrieve_data_for_inference"], [10, 2, 1, "", "set_cl_socket"]], "ts.torch_handler": [[14, 0, 0, "-", "base_handler"], [14, 0, 0, "-", "contractions"], [14, 0, 0, "-", "densenet_handler"], [14, 0, 0, "-", "image_classifier"], [14, 0, 0, "-", "image_segmenter"], [14, 0, 0, "-", "object_detector"], [15, 0, 0, "-", "request_envelope"], [14, 0, 0, "-", "text_classifier"], [14, 0, 0, "-", "text_handler"], [16, 0, 0, "-", "unit_tests"], [14, 0, 0, "-", "vision_handler"]], "ts.torch_handler.base_handler": [[14, 1, 1, "", "BaseHandler"], [14, 4, 1, "", "setup_ort_session"]], "ts.torch_handler.base_handler.BaseHandler": [[14, 2, 1, "", "describe_handle"], [14, 2, 1, "", "explain_handle"], [14, 2, 1, "", "get_device"], [14, 2, 1, "", "handle"], [14, 2, 1, "", "inference"], [14, 2, 1, "", "initialize"], [14, 2, 1, "", "postprocess"], [14, 2, 1, "", "preprocess"]], "ts.torch_handler.densenet_handler": [[14, 1, 1, "", "DenseNetHandler"], [14, 4, 1, "", "list_classes_from_module"]], "ts.torch_handler.densenet_handler.DenseNetHandler": [[14, 2, 1, "", "handle"], [14, 2, 1, "", "inference"], [14, 2, 1, "", "initialize"]], "ts.torch_handler.image_classifier": [[14, 1, 1, "", "ImageClassifier"]], "ts.torch_handler.image_classifier.ImageClassifier": [[14, 2, 1, "", "get_max_result_classes"], [14, 5, 1, "", "image_processing"], [14, 2, 1, "", "postprocess"], [14, 2, 1, "", "set_max_result_classes"], [14, 5, 1, "", "topk"]], "ts.torch_handler.image_segmenter": [[14, 1, 1, "", "ImageSegmenter"]], "ts.torch_handler.image_segmenter.ImageSegmenter": [[14, 5, 1, "", "image_processing"], [14, 2, 1, "", "postprocess"]], "ts.torch_handler.object_detector": [[14, 1, 1, "", "ObjectDetector"]], "ts.torch_handler.object_detector.ObjectDetector": [[14, 5, 1, "", "image_processing"], [14, 2, 1, "", "initialize"], [14, 2, 1, "", "postprocess"], [14, 5, 1, "", "threshold"]], "ts.torch_handler.request_envelope": [[15, 0, 0, "-", "base"], [15, 0, 0, "-", "body"], [15, 0, 0, "-", "json"], [15, 0, 0, "-", "kserve"], [15, 0, 0, "-", "kservev2"]], "ts.torch_handler.request_envelope.base": [[15, 1, 1, "", "BaseEnvelope"]], "ts.torch_handler.request_envelope.base.BaseEnvelope": [[15, 2, 1, "", "format_output"], [15, 2, 1, "", "handle"], [15, 2, 1, "", "parse_input"]], "ts.torch_handler.request_envelope.body": [[15, 1, 1, "", "BodyEnvelope"]], "ts.torch_handler.request_envelope.body.BodyEnvelope": [[15, 2, 1, "", "format_output"], [15, 2, 1, "", "parse_input"]], "ts.torch_handler.request_envelope.json": [[15, 1, 1, "", "JSONEnvelope"]], "ts.torch_handler.request_envelope.json.JSONEnvelope": [[15, 2, 1, "", "format_output"], [15, 2, 1, "", "parse_input"]], "ts.torch_handler.request_envelope.kserve": [[15, 1, 1, "", "KServeEnvelope"]], "ts.torch_handler.request_envelope.kserve.KServeEnvelope": [[15, 2, 1, "", "format_output"], [15, 2, 1, "", "parse_input"]], "ts.torch_handler.request_envelope.kservev2": [[15, 1, 1, "", "KServev2Envelope"]], "ts.torch_handler.request_envelope.kservev2.KServev2Envelope": [[15, 2, 1, "", "format_output"], [15, 2, 1, "", "parse_input"]], "ts.torch_handler.text_classifier": [[14, 1, 1, "", "TextClassifier"]], "ts.torch_handler.text_classifier.TextClassifier": [[14, 2, 1, "", "get_insights"], [14, 2, 1, "", "inference"], [14, 5, 1, "", "ngrams"], [14, 2, 1, "", "postprocess"], [14, 2, 1, "", "preprocess"]], "ts.torch_handler.text_handler": [[14, 1, 1, "", "TextHandler"]], "ts.torch_handler.text_handler.TextHandler": [[14, 2, 1, "", "get_source_vocab_path"], [14, 2, 1, "", "get_word_token"], [14, 2, 1, "", "initialize"], [14, 2, 1, "", "summarize_attributions"]], "ts.torch_handler.unit_tests": [[17, 0, 0, "-", "models"], [16, 0, 0, "-", "test_base_handler"], [16, 0, 0, "-", "test_envelopes"], [16, 0, 0, "-", "test_image_classifier"], [16, 0, 0, "-", "test_image_segmenter"], [16, 0, 0, "-", "test_object_detector"], [18, 0, 0, "-", "test_utils"]], "ts.torch_handler.unit_tests.models": [[17, 0, 0, "-", "base_model"]], "ts.torch_handler.unit_tests.models.base_model": [[17, 1, 1, "", "ArgmaxModel"], [17, 4, 1, "", "save_pt_file"]], "ts.torch_handler.unit_tests.models.base_model.ArgmaxModel": [[17, 2, 1, "", "forward"], [17, 5, 1, "", "training"]], "ts.torch_handler.unit_tests.test_base_handler": [[16, 4, 1, "", "handler"], [16, 4, 1, "", "test_batch_handle"], [16, 4, 1, "", "test_inference_with_profiler_works_with_custom_initialize_method"], [16, 4, 1, "", "test_single_handle"]], "ts.torch_handler.unit_tests.test_envelopes": [[16, 4, 1, "", "handle_fn"], [16, 4, 1, "", "test_binary"], [16, 4, 1, "", "test_body"], [16, 4, 1, "", "test_json"], [16, 4, 1, "", "test_json_batch"], [16, 4, 1, "", "test_json_double_batch"]], "ts.torch_handler.unit_tests.test_image_classifier": [[16, 4, 1, "", "context"], [16, 4, 1, "", "handler"], [16, 4, 1, "", "image_bytes"], [16, 4, 1, "", "model_dir"], [16, 4, 1, "", "model_name"], [16, 4, 1, "", "test_handle"], [16, 4, 1, "", "test_handle_explain"]], "ts.torch_handler.unit_tests.test_image_segmenter": [[16, 4, 1, "", "context"], [16, 4, 1, "", "handler"], [16, 4, 1, "", "image_bytes"], [16, 4, 1, "", "model_dir"], [16, 4, 1, "", "model_name"], [16, 4, 1, "", "test_handle"]], "ts.torch_handler.unit_tests.test_object_detector": [[16, 4, 1, "", "context"], [16, 4, 1, "", "handler"], [16, 4, 1, "", "image_bytes"], [16, 4, 1, "", "model_dir"], [16, 4, 1, "", "model_name"], [16, 4, 1, "", "test_handle"]], "ts.torch_handler.unit_tests.test_utils": [[18, 0, 0, "-", "mock_context"]], "ts.torch_handler.unit_tests.test_utils.mock_context": [[18, 1, 1, "", "MockContext"]], "ts.torch_handler.unit_tests.test_utils.mock_context.MockContext": [[18, 2, 1, "", "get_request_header"]], "ts.torch_handler.vision_handler": [[14, 1, 1, "", "VisionHandler"]], "ts.torch_handler.vision_handler.VisionHandler": [[14, 2, 1, "", "get_insights"], [14, 2, 1, "", "initialize"], [14, 2, 1, "", "preprocess"]], "ts.utils": [[19, 0, 0, "-", "timeit_decorator"], [19, 0, 0, "-", "util"]], "ts.utils.timeit_decorator": [[19, 4, 1, "", "timeit"]], "ts.utils.util": [[19, 1, 1, "", "PT2Backend"], [19, 6, 1, "", "PredictionException"], [19, 4, 1, "", "check_valid_pt2_backend"], [19, 4, 1, "", "deprecated"], [19, 4, 1, "", "get_yaml_config"], [19, 4, 1, "", "list_classes_from_module"], [19, 4, 1, "", "load_label_mapping"], [19, 4, 1, "", "map_class_to_label"]], "ts.utils.util.PT2Backend": [[19, 5, 1, "", "AOT_CUDAGRAPHS"], [19, 5, 1, "", "AOT_EAGER"], [19, 5, 1, "", "AOT_NVFUSER"], [19, 5, 1, "", "EAGER"], [19, 5, 1, "", "FX2TRT"], [19, 5, 1, "", "INDUCTOR"], [19, 5, 1, "", "IPEX"], [19, 5, 1, "", "NVFUSER"], [19, 5, 1, "", "OFI"], [19, 5, 1, "", "ONNXRT"], [19, 5, 1, "", "OPENVINO"], [19, 5, 1, "", "TORCHXLA_TRACE_ONCE"]]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:method", "3": "py:property", "4": "py:function", "5": "py:attribute", "6": "py:exception"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "method", "Python method"], "3": ["py", "property", "Python property"], "4": ["py", "function", "Python function"], "5": ["py", "attribute", "Python attribute"], "6": ["py", "exception", "Python exception"]}, "titleterms": {"faq": 0, "s": [0, 23, 40], "gener": [0, 38, 40, 46], "doe": 0, "torchserv": [0, 1, 3, 23, 25, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 40, 43, 45, 47, 49, 50, 51, 53, 54, 55, 56, 59], "api": [0, 3, 4, 21, 23, 25, 26, 29, 31, 33, 39, 40, 41, 42, 49, 53, 57, 58], "follow": [0, 24], "some": 0, "rest": [0, 29, 49], "standard": 0, "how": [0, 2, 3, 35, 43, 53], "us": [0, 3, 22, 23, 29, 35, 42, 54, 56], "product": [0, 54], "what": [0, 22, 34], "differ": 0, "between": 0, "python": [0, 3, 24, 25, 27, 29, 31, 34], "web": 0, "app": 0, "framework": 0, "like": 0, "flask": 0, "django": 0, "ar": [0, 3], "ani": 0, "sampl": 0, "model": [0, 3, 17, 23, 24, 25, 27, 29, 35, 37, 39, 40, 42, 43, 44, 46, 50, 51, 56, 59], "avail": 0, "support": [0, 2, 22, 23, 37], "other": [0, 25, 37], "base": [0, 15, 29, 40], "program": 0, "languag": 0, "than": 0, "benefit": 0, "have": [0, 3], "over": 0, "aw": 0, "multi": 0, "server": [0, 31, 35, 41], "decod": 0, "intern": [0, 34], "infer": [0, 3, 23, 28, 33, 35, 46, 57], "respons": [0, 35], "client": [0, 29, 31], "side": [0, 31, 35], "perform": [0, 47], "do": [0, 2, 34, 36], "i": [0, 3], "improv": 0, "cpu": 0, "deploy": [0, 3, 37], "config": [0, 3, 23, 25, 35, 38], "can": [0, 3], "run": [0, 24, 45, 50], "port": [0, 3, 25], "default": [0, 1, 23, 27, 28, 39, 40, 42], "8080": [0, 3], "8081": [0, 3], "resolv": [0, 3], "specif": [0, 3, 25, 27, 59], "depend": [0, 3, 24, 27, 56], "deploi": [0, 37, 56], "kubernet": 0, "elb": 0, "asg": 0, "backup": 0, "restor": 0, "state": 0, "build": [0, 24], "imag": 0, "from": [0, 27, 29, 54, 55], "sourc": [0, 54, 55], "branch": [0, 24], "commit": 0, "id": 0, "creat": [0, 27, 30, 40], "dockerfil": 0, "dev": 0, "order": 0, "properti": [0, 23, 25, 38, 59], "path": 0, "model_stor": 0, "load_model": 0, "curl": [0, 33, 57], "make": 0, "request": [0, 3, 27, 48], "add": [0, 3, 40], "custom": [0, 3, 25, 27, 38, 40, 50, 56], "an": [0, 27], "exist": 0, "pass": 0, "multipl": [0, 27, 50], "call": 0, "my": [0, 3], "handler": [0, 1, 23, 27, 28, 29, 59], "return": [0, 27], "output": 0, "enhanc": 0, "alwai": 0, "write": [0, 27], "ones": 0, "Is": 0, "possibl": 0, "hug": [0, 35], "face": [0, 35], "archiv": [0, 3, 23, 24, 27, 29], "mar": [0, 3], "file": [0, 3, 25, 34, 35, 50, 59], "docker": [0, 23], "contain": [0, 23], "serial": 0, "singl": 0, "download": 0, "regist": [0, 3, 39, 58], "s3": 0, "presign": 0, "v4": 0, "url": 0, "host": 0, "set": [0, 39, 42, 53], "batch": [0, 23], "size": [0, 40], "sagemak": 0, "kei": 0, "paramet": [0, 25], "tune": [0, 35], "why": 0, "initi": 0, "so": 0, "slow": 0, "announc": 1, "secur": [1, 2, 56], "chang": [1, 3], "basic": 1, "featur": [1, 3, 28, 50], "exampl": [1, 22, 25, 31, 33, 36, 42, 57], "advanc": [1, 25, 27, 50], "polici": 2, "version": [2, 10, 39], "we": 2, "import": [2, 34], "guidelin": 2, "report": 2, "vulner": 2, "troubleshoot": [3, 54], "guid": [3, 47], "issu": [3, 52, 59], "fail": 3, "bind": 3, "address": [3, 25], "http": [3, 35, 52], "127": 3, "0": [3, 55], "1": [3, 35], "alreadi": 3, "java": [3, 34], "lang": 3, "nosuchmethoderror": 3, "when": 3, "start": [3, 27, 29, 40], "473": 3, "unabl": 3, "send": 3, "big": 3, "snapshot": [3, 51], "relat": [3, 59], "disabl": [3, 53], "stop": [3, 29], "after": 3, "restart": 3, "invalidsnapshotexcept": 3, "except": 3, "where": 3, "store": [3, 29], "temp": 3, "directori": [3, 34], "conflictstatusexcept": 3, "error": [3, 27], "code": [3, 24, 27, 29], "409": 3, "downloadmodelexcept": 3, "400": 3, "modelnotfoundexcept": 3, "404": 3, "serviceunavailableexcept": 3, "503": 3, "ad": 3, "requir": 3, "txt": 3, "packag": [3, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 24, 25], "list": [3, 39, 58], "get": [3, 29, 40], "instal": [3, 24, 27, 29, 36, 54, 55], "backend": [3, 24, 25, 34, 40], "worker": [3, 25, 39], "monitor": 3, "thread": 3, "interrupt": 3, "process": 3, "di": 3, "develop": [4, 26, 54], "serv": [5, 29, 34, 35, 39, 50, 56], "run_circleci_test": 6, "modul": [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 27], "setup": [7, 23, 55], "test": [8, 22, 56], "submodul": [8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20], "regression_test": 8, "content": [8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 23, 26, 27, 40, 50, 54, 55], "torchserve_san": 9, "ts": [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 24], "subpackag": [10, 14, 16], "arg_pars": 10, "context": 10, "model_load": 10, "model_serv": 10, "model_service_work": 10, "servic": [10, 21, 26, 27, 50], "metric": [11, 25, 40, 41, 50], "dimens": [11, 40], "metric_collector": 11, "metric_encod": 11, "metrics_stor": 11, "process_memory_metr": 11, "system_metr": 11, "unit": 11, "model_servic": 12, "protocol": 13, "otf_message_handl": 13, "torch_handl": [14, 15, 16, 17, 18], "base_handl": 14, "contract": 14, "densenet_handl": 14, "image_classifi": [14, 28], "image_segment": [14, 28], "object_detector": [14, 28], "text_classifi": [14, 28], "text_handl": 14, "vision_handl": 14, "request_envelop": 15, "bodi": 15, "json": [15, 28], "kserv": [15, 33], "kservev2": 15, "unit_test": [16, 17, 18], "test_base_handl": 16, "test_envelop": 16, "test_image_classifi": 16, "test_image_segment": 16, "test_mnist_kf": 16, "test_object_detector": 16, "base_model": 17, "test_util": 18, "mock_context": 18, "util": [19, 20], "timeit_decor": 19, "ts_script": 20, "api_util": 20, "backend_util": 20, "frontend_util": 20, "install_depend": 20, "install_from_src": 20, "marsgen": 20, "modelarchiver_util": 20, "print_env_info": 20, "regression_util": 20, "sanity_util": 20, "shell_util": 20, "torchserve_grpc_cli": 20, "tsutil": 20, "validate_model_on_gpu": 20, "workflow_archiver_util": 20, "appl": 22, "silicon": 22, "experiment": [22, 36], "resnet": [22, 23], "18": [22, 55], "mp": [22, 45], "On": 22, "mac": 22, "m1": 22, "pro": 22, "conda": 22, "thi": [23, 27, 50, 54, 55], "document": [23, 27, 50, 54, 55], "introduct": [23, 40], "prerequisit": [23, 38, 54], "152": 23, "configur": [23, 25, 40, 53], "demo": 23, "torch": [23, 29], "manag": [23, 39, 58], "through": [23, 29], "coverag": 24, "To": [24, 36], "check": [24, 33], "stabil": 24, "saniti": 24, "suit": 24, "frontend": [24, 34, 40], "command": [24, 25, 50, 54], "pytest": 24, "lint": 24, "IT": 24, "markdown": [24, 52], "link": 24, "checker": 24, "environ": 25, "variabl": 25, "line": [25, 42, 50], "jvm": 25, "option": 25, "load": [25, 35, 43], "startup": 25, "listen": 25, "grpc": [25, 29, 31, 35], "max": 25, "connect": 25, "ag": 25, "enabl": [25, 38, 42], "ssl": 25, "cross": 25, "origin": 25, "resourc": [25, 47], "share": 25, "cor": 25, "prefer": 25, "direct": 25, "buffer": 25, "allow": 25, "restrict": 25, "access": [25, 38], "limit": 25, "gpu": [25, 27, 56], "usag": [25, 48, 54], "nvidia": [25, 45], "control": [25, 42], "visibl": 25, "basehandl": 27, "level": 27, "entri": 27, "point": 27, "class": 27, "scratch": 27, "predict": [27, 29, 33, 57], "explan": [27, 33], "captum": 27, "extend": 27, "handl": 27, "execut": 27, "common": 28, "index_to_nam": 28, "contribut": [28, 29, 48], "For": [29, 54], "debian": 29, "system": [29, 46], "maco": 29, "window": [29, 54, 55], "inspect": 29, "log": [29, 38, 40, 50], "debug": 29, "github": [30, 52], "action": 30, "step": 30, "stream": [31, 35], "descript": [33, 39], "health": 33, "architectur": 34, "terminolog": 34, "pytorch": [34, 35, 47, 56], "thei": 34, "core": 34, "engin": 34, "larg": 35, "work": 35, "pippi": 35, "nativ": 35, "solut": 35, "deepspe": 35, "mii": 35, "acceler": 35, "tip": 35, "reduc": 35, "latenc": 35, "yaml": 35, "sensit": 35, "applic": 35, "job": 35, "ticket": 35, "via": 35, "chunk": 35, "encod": 35, "linux": [36, 55], "aarch64": 36, "optim": [36, 46, 47], "llm": 37, "quickstart": 37, "wai": [37, 42], "type": [38, 40], "modifi": 38, "behavior": 38, "provid": 38, "asynchron": 38, "encrypt": 39, "scale": 39, "describ": [39, 58], "unregist": [39, 58], "token": [39, 53], "author": [39, 53], "mode": [40, 42, 56], "prometheu": [40, 41], "legaci": 40, "auto": 40, "detect": 40, "format": 40, "object": 40, "function": 40, "without": 40, "time": 40, "percentag": 40, "counter": 40, "A": 40, "grafana": 41, "three": 42, "cmd": 42, "zoo": [44, 56], "benchmark": [45, 47], "g4": 45, "instanc": 45, "p3": 45, "summari": 45, "checklist": 46, "profil": 47, "more": 47, "envelop": 48, "overview": 50, "technic": 50, "detail": 50, "interfac": 50, "argument": 50, "prioriti": 50, "cf": 52, "com": 52, "ryanfox": 52, "sphinx": 52, "tabl": 52, "36": 52, "note": 53, "binari": [54, 55], "below": 54, "purpos": 54, "subsystem": 55, "wsl": 55, "ubuntu": 55, "4": 55, "case": 56, "eager": 56, "script": 56, "readymad": 56, "third": 56, "parti": 56, "ab": 56, "workflow": [57, 58, 59], "dag": 59, "sequenti": 59, "parallel": 59, "doc": 59, "known": 59}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 6, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.todo": 2, "sphinx.ext.viewcode": 1, "sphinx": 56}}) \ No newline at end of file