Merge branch 'master' into examples/yolov8

pytorch · Aug 24, 2023 · 07bd3cc · 07bd3cc
2 parents b1be076 + bb4eb8b
commit 07bd3cc
Show file tree

Hide file tree

Showing 55 changed files with 1,331 additions and 261 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -12,6 +12,7 @@ repos:
       - id: check-json
       - id: check-toml
       - id: check-yaml
+        args: [--allow-multiple-documents, --unsafe]
       - id: end-of-file-fixer
       - id: mixed-line-ending
       - id: trailing-whitespace
@@ -24,12 +25,12 @@ repos:
       - id: python-no-log-warn
       - id: python-use-type-annotations
   - repo: https://github.com/hadialqattan/pycln
-    rev: v2.1.3
+    rev: v2.1.5
     hooks:
       - id: pycln
         args: [--all]
   - repo: https://github.com/psf/black
-    rev: 23.1.0
+    rev: 23.7.0
     hooks:
       - id: black
         additional_dependencies: ['click==8.0.4']

diff --git a/benchmarks/benchmark-ab.py b/benchmarks/benchmark-ab.py
@@ -30,6 +30,7 @@
     "image": "",
     "docker_runtime": "",
     "backend_profiling": False,
+    "handler_profiling": False,
     "generate_graphs": False,
     "config_properties": "config.properties",
     "inference_model_url": "predictions/benchmark",
@@ -95,6 +96,12 @@ def json_provider(file_path, cmd_name):
     default=False,
     help="Enable backend profiling using CProfile. Default False",
 )
+@click.option(
+    "--handler_profiling",
+    "-hp",
+    default=False,
+    help="Enable handler profiling. Default False",
+)
 @click.option(
     "--generate_graphs",
     "-gg",
@@ -143,6 +150,7 @@ def benchmark(
     image,
     docker_runtime,
     backend_profiling,
+    handler_profiling,
     config_properties,
     inference_model_url,
     report_location,
@@ -163,6 +171,7 @@ def benchmark(
         "image": image,
         "docker_runtime": docker_runtime,
         "backend_profiling": backend_profiling,
+        "handler_profiling": handler_profiling,
         "config_properties": config_properties,
         "inference_model_url": inference_model_url,
         "report_location": report_location,
@@ -469,13 +478,26 @@ def generate_report(warm_up_lines):
 }
 
 
+def update_metrics():
+    if execution_params["handler_profiling"]:
+        opt_metrics = {
+            "handler_preprocess.txt": "ts_handler_preprocess",
+            "handler_inference.txt": "ts_handler_inference",
+            "handler_postprocess.txt": "ts_handler_postprocess",
+        }
+        metrics.update(opt_metrics)
+    return metrics
+
+
 def extract_metrics(warm_up_lines):
     with open(execution_params["metric_log"]) as f:
         lines = f.readlines()
 
     click.secho(f"Dropping {warm_up_lines} warmup lines from log", fg="green")
     lines = lines[warm_up_lines:]
 
+    metrics = update_metrics()
+
     for k, v in metrics.items():
         all_lines = []
         pattern = re.compile(v)

diff --git a/benchmarks/utils/gen_model_config_json.py b/benchmarks/utils/gen_model_config_json.py
@@ -2,11 +2,11 @@
 import copy
 import json
 import os
+
 import yaml
 
 
 def main():
-
     parser = argparse.ArgumentParser()
 
     parser.add_argument(
@@ -22,6 +22,7 @@ def main():
     arguments = parser.parse_args()
     convert_yaml_to_json(arguments.input, arguments.output)
 
+
 MODEL_CONFIG_KEY = {
     "batch_size",
     "batch_delay",
@@ -30,12 +31,18 @@ def main():
     "concurrency",
     "workers",
     "input",
-    "processors"
+    "processors",
+    "handler_profiling",
 }
 
+
 def convert_yaml_to_json(yaml_file_path, output_dir):
-    print("convert_yaml_to_json yaml_file_path={}, output_dir={}".format(yaml_file_path, output_dir))
-    with open(yaml_file_path, 'r') as f:
+    print(
+        "convert_yaml_to_json yaml_file_path={}, output_dir={}".format(
+            yaml_file_path, output_dir
+        )
+    )
+    with open(yaml_file_path, "r") as f:
         yaml_dict = yaml.safe_load(f)
 
         for model, config in yaml_dict.items():
@@ -58,10 +65,9 @@ def convert_yaml_to_json(yaml_file_path, output_dir):
                 batch_worker_list = []
                 for batch_size in batch_size_list:
                     for workers in workers_list:
-                        batch_worker_list.append({
-                            "batch_size" : batch_size,
-                            "workers" : workers
-                        })
+                        batch_worker_list.append(
+                            {"batch_size": batch_size, "workers": workers}
+                        )
 
                 benchmark_configs = []
                 for batch_worker in batch_worker_list:
@@ -72,25 +78,34 @@ def convert_yaml_to_json(yaml_file_path, output_dir):
                 for bConfig in benchmark_configs:
                     for i in range(len(processors)):
                         if type(processors[i]) is str:
-                            path = '{}/{}'.format(output_dir, processors[i])
+                            path = "{}/{}".format(output_dir, processors[i])
                             if not os.path.isdir(path):
                                 continue
 
-                            benchmark_config_file = '{}/{}_w{}_b{}.json'\
-                                .format(path, model_name, bConfig["workers"], bConfig["batch_size"])
+                            benchmark_config_file = "{}/{}_w{}_b{}.json".format(
+                                path,
+                                model_name,
+                                bConfig["workers"],
+                                bConfig["batch_size"],
+                            )
                             with open(benchmark_config_file, "w") as outfile:
                                 json.dump(bConfig, outfile, indent=4)
                         elif type(processors[i]) is dict:
-                            path = '{}/gpu'.format(output_dir)
+                            path = "{}/gpu".format(output_dir)
                             if not os.path.isdir(path):
                                 continue
 
                             bConfig["gpus"] = processors[i]["gpus"]
-                            benchmark_config_file = '{}/{}_w{}_b{}.json'\
-                                .format(path, model_name, bConfig["workers"], bConfig["batch_size"])
+                            benchmark_config_file = "{}/{}_w{}_b{}.json".format(
+                                path,
+                                model_name,
+                                bConfig["workers"],
+                                bConfig["batch_size"],
+                            )
                             with open(benchmark_config_file, "w") as outfile:
                                 json.dump(bConfig, outfile, indent=4)
                             del bConfig["gpus"]
 
+
 if __name__ == "__main__":
     main()
diff --git a/docs/FAQs.md b/docs/FAQs.md
@@ -1,6 +1,7 @@
 # FAQ'S
 Contents of this document.
 * [General](#general)
+* [Performance](#performance)
 * [Deployment and config](#deployment-and-config)
 * [API](#api)
 * [Handler](#handler)
@@ -34,9 +35,23 @@ No, As of now only python based models are supported.
 Torchserve is derived from Multi-Model-Server. However, Torchserve is specifically tuned for Pytorch models. It also has new features like Snapshot and model versioning.
 
 ### How to decode international language in inference response on client side?
-By default, Torchserve uses utf-8 to encode if the inference response is string. So client can use utf-8 to decode. 
+By default, Torchserve uses utf-8 to encode if the inference response is string. So client can use utf-8 to decode.
 
-If a model converts international language string to bytes, client needs to use the codec mechanism specified by the model such as in https://github.com/pytorch/serve/blob/master/examples/nmt_transformer/model_handler_generalized.py#L55
+If a model converts international language string to bytes, client needs to use the codec mechanism specified by the model such as in https://github.com/pytorch/serve/blob/master/examples/nmt_transformer/model_handler_generalized.py
+
+## Performance
+
+Relevant documents.
+- [Performance Guide](performance_guide.md)
+
+### How do I improve TorchServe performance on CPU?
+CPU performance is heavily influenced by launcher core pinning. We recommend setting the following properties in your `config.properties`:
+
+```bash
+cpu_launcher_enable=true
+cpu_launcher_args=--use_logical_core
+```
+More background on improving CPU performance can be found in this [blog post](https://pytorch.org/tutorials/intermediate/torchserve_with_ipex#grokking-pytorch-intel-cpu-performance-from-first-principles).
 
 ## Deployment and config
 Relevant documents.
@@ -97,7 +112,7 @@ TorchServe looks for the config.property file according to the order listed in t
 
 - [models](configuration.md): Defines a list of models' configuration in config.property. A model's configuration can be overridden by [management API](management_api.md). It does not decide which models will be loaded during TorchServe start. There is no relationship b.w "models" and "load_models" (ie. TorchServe command line option [--models](configuration.md)).
 
-### 
+###
 
 ## API
 Relevant documents
@@ -133,7 +148,7 @@ Refer to [default handlers](default_handlers.md) for more details.
 
 ### Is it possible to deploy Hugging Face models?
 Yes, you can deploy Hugging Face models using a custom handler.
-Refer to [HuggingFace_Transformers](https://github.com/pytorch/serve/blob/master/examples/Huggingface_Transformers/README.md#huggingface-transformers) for example. 
+Refer to [HuggingFace_Transformers](https://github.com/pytorch/serve/blob/master/examples/Huggingface_Transformers/README.md#huggingface-transformers) for example.
 
 ## Model-archiver
  Relevant documents

diff --git a/docs/index.rst b/docs/index.rst
@@ -56,6 +56,13 @@ What's going on in TorchServe?
    :link: performance_guide.html
    :tags: Performance,Troubleshooting
 
+.. customcarditem::
+   :header: Large Model Inference
+   :card_description: Serving Large Models with TorchServe
+   :image: https://github.com/raw/pytorch/serve/master/docs/images/ts-lmi-internal.png
+   :link: large_model_inference.html
+   :tags: Large-Models,Performance
+
 .. customcarditem::
    :header: Troubleshooting
    :card_description: Various updates on Torcherve and use cases.