From bff0e0343218fd61b02074d6f0ccc1d36297ee9c Mon Sep 17 00:00:00 2001 From: Sara Adkins Date: Fri, 21 Jul 2023 17:30:10 -0400 Subject: [PATCH 01/37] WIP pipeline benchmark script --- .../benchmark/benchmark_pipeline.py | 474 ++++++++++++++++++ 1 file changed, 474 insertions(+) create mode 100644 src/deepsparse/benchmark/benchmark_pipeline.py diff --git a/src/deepsparse/benchmark/benchmark_pipeline.py b/src/deepsparse/benchmark/benchmark_pipeline.py new file mode 100644 index 0000000000..c33c546130 --- /dev/null +++ b/src/deepsparse/benchmark/benchmark_pipeline.py @@ -0,0 +1,474 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Benchmarking script ONNX models in a DeepSparse pipeline + +########## +Command help: +usage: deepsparse.benchmark_pipeline [-h] [-b BATCH_SIZE] [-input_config INPUT_CONFIG] + [-ncores NUM_CORES] [-s {async,sync,elastic}] + [-t TIME] [-w WARMUP_TIME] [-nstreams NUM_STREAMS] + [-pin {none,core,numa}] + [-e {deepsparse,onnxruntime}] [-q] + [-x EXPORT_PATH] + model_path + +Benchmark ONNX models in a DeepSparse pipeline + +positional arguments: + model_path Path to an ONNX model file or SparseZoo model stub. + +optional arguments: + -h, --help show this help message and exit. + -b BATCH_SIZE, --batch_size BATCH_SIZE + The batch size to run the analysis for. Must be + greater than 0. + -input_config INPUT_CONFIG + JSON file containing schema for input data. + -ncores NUM_CORES, --num_cores NUM_CORES + The number of physical cores to run the analysis on, + defaults to all physical cores available on the system. + -s {async,sync,elastic}, --scenario {async,sync,elastic} + Choose between using the async, sync and elastic + scenarios. Sync and async are similar to the single- + stream/multi-stream scenarios. Elastic is a newer + scenario that behaves similarly to the async scenario + but uses a different scheduling backend. Default value + is sync. + -t TIME, --time TIME The number of seconds the benchmark will run. Default + is 10 seconds. + -w WARMUP_TIME, --warmup_time WARMUP_TIME + The number of seconds the benchmark will warmup before + running.Default is 2 seconds. + -nstreams NUM_STREAMS, --num_streams NUM_STREAMS + The number of streams that will submit inferences in + parallel using async scenario. Default is + automatically determined for given hardware and may be + sub-optimal. + -pin {none,core,numa}, --thread_pinning {none,core,numa} + Enable binding threads to cores ('core' the default), + threads to cores on sockets ('numa'), or disable + ('none'). + -e {deepsparse,onnxruntime}, --engine {deepsparse,onnxruntime} + Inference engine backend to run eval on. Choices are + 'deepsparse', 'onnxruntime'. Default is 'deepsparse'. + -q, --quiet Lower logging verbosity. + -x EXPORT_PATH, --export_path EXPORT_PATH + Store results into a JSON file. + +########## +Example on a BERT from SparseZoo: +deepsparse.benchmark \ + zoo:nlp/question_answering/bert-base/pytorch/huggingface/squad/base-none + +########## +Example on a BERT from SparseZoo with sequence length 512: +deepsparse.benchmark \ + zoo:nlp/question_answering/bert-base/pytorch/huggingface/squad/base-none \ + --input_shapes "[1,512],[1,512],[1,512]" + +########## +Example on local ONNX model: +deepsparse.benchmark /PATH/TO/model.onnx + +########## +Example on local ONNX model at batch size 32 with synchronous (singlestream) execution: +deepsparse.benchmark /PATH/TO/model.onnx --batch_size 32 --scenario sync + +""" + +import argparse +import importlib +import json +import logging +import os +from typing import Dict + +from deepsparse import Scheduler, __version__, compile_model +from deepsparse.benchmark.ort_engine import ORTEngine +from deepsparse.benchmark.stream_benchmark import model_stream_benchmark +from deepsparse.cpu import cpu_architecture +from deepsparse.log import set_logging_level +from deepsparse.utils import ( + generate_random_inputs, + model_to_path, + override_onnx_input_shapes, + parse_input_shapes, +) + + +__all__ = ["benchmark_pipelin"] + + +_LOGGER = logging.getLogger(__name__) + +DEEPSPARSE_ENGINE = "deepsparse" +ORT_ENGINE = "onnxruntime" + + +def parse_args(): + parser = argparse.ArgumentParser( + description="Benchmark ONNX models in the DeepSparse Engine" + ) + + parser.add_argument( + "model_path", + type=str, + help="Path to an ONNX model file or SparseZoo model stub", + ) + + parser.add_argument( + "-b", + "--batch_size", + type=int, + default=1, + help="The batch size to run the analysis for. Must be greater than 0", + ) + parser.add_argument( + "-i", + "--input_config", + type=str, + default="config.json", + help="JSON file containing schema for input data" + ) + parser.add_argument( + "-ncores", + "--num_cores", + type=int, + default=cpu_architecture().num_available_physical_cores, + help=( + "The number of physical cores to run the analysis on, " + "defaults to all physical cores available on the system" + ), + ) + parser.add_argument( + "-s", + "--scenario", + type=str, + default="sync", + choices=["async", "sync", "elastic"], + help=( + "Choose between using the async, sync and elastic scenarios. Sync and " + "async are similar to the single-stream/multi-stream scenarios. Elastic " + "is a newer scenario that behaves similarly to the async scenario " + "but uses a different scheduling backend. Default value is sync." + ), + ) + parser.add_argument( + "-t", + "--time", + type=int, + default=10, + help="The number of seconds the benchmark will run. Default is 10 seconds.", + ) + parser.add_argument( + "-w", + "--warmup_time", + type=int, + default=2, + help=( + "The number of seconds the benchmark will warmup before running." + "Default is 2 seconds." + ), + ) + parser.add_argument( + "-nstreams", + "--num_streams", + type=int, + default=None, + help=( + "The number of streams that will submit inferences in parallel using " + "async scenario. Default is automatically determined for given hardware " + "and may be sub-optimal." + ), + ) + parser.add_argument( + "-pin", + "--thread_pinning", + type=str, + default="core", + choices=["none", "core", "numa"], + help=( + "Enable binding threads to cores ('core' the default), " + "threads to cores on sockets ('numa'), or disable ('none')" + ), + ) + parser.add_argument( + "-e", + "--engine", + type=str, + default=DEEPSPARSE_ENGINE, + help=( + "Inference engine backend to run eval on. Choices are 'deepsparse', " + "'onnxruntime'. Default is 'deepsparse'. Can also specify a user " + "defined engine class by giving the script and class name in the " + "following format :. This " + "engine class will be dynamically imported during runtime" + ), + ) + parser.add_argument( + "-q", + "--quiet", + help="Lower logging verbosity", + action="store_true", + default=False, + ) + parser.add_argument( + "-x", + "--export_path", + help="Store results into a JSON file", + type=str, + default=None, + ) + + return parser.parse_args() + + +def decide_thread_pinning(pinning_mode: str) -> None: + pinning_mode = pinning_mode.lower() + if pinning_mode in "core": + os.environ["NM_BIND_THREADS_TO_CORES"] = "1" + _LOGGER.info("Thread pinning to cores enabled") + elif pinning_mode in "numa": + os.environ["NM_BIND_THREADS_TO_CORES"] = "0" + os.environ["NM_BIND_THREADS_TO_SOCKETS"] = "1" + _LOGGER.info("Thread pinning to socket/numa nodes enabled") + elif pinning_mode in "none": + os.environ["NM_BIND_THREADS_TO_CORES"] = "0" + os.environ["NM_BIND_THREADS_TO_SOCKETS"] = "0" + _LOGGER.info("Thread pinning disabled, performance may be sub-optimal") + else: + _LOGGER.info( + "Recieved invalid option for thread_pinning '{}', skipping".format( + pinning_mode + ) + ) + +def parse_input_config(input_config: str) -> Dict[str, object]: + return json.loads(input_config) + +def parse_scheduler(scenario: str) -> Scheduler: + scenario = scenario.lower() + if scenario == "multistream": + return Scheduler.multi_stream + elif scenario == "singlestream": + return Scheduler.single_stream + elif scenario == "elastic": + return Scheduler.elastic + else: + return Scheduler.multi_stream + + +def parse_scenario(scenario: str) -> str: + scenario = scenario.lower() + if scenario == "async": + return "multistream" + elif scenario == "sync": + return "singlestream" + elif scenario == "elastic": + return "elastic" + else: + _LOGGER.info( + "Recieved invalid option for scenario'{}', defaulting to async".format( + scenario + ) + ) + return "multistream" + + +def parse_num_streams(num_streams: int, num_cores: int, scenario: str): + # If model.num_streams is set, and the scenario is either "multi_stream" or + # "elastic", use the value of num_streams given to us by the model, otherwise + # use a semi-sane default value. + if scenario == "sync" or scenario == "singlestream": + if num_streams and num_streams > 1: + _LOGGER.info("num_streams reduced to 1 for singlestream scenario.") + return 1 + else: + if num_streams: + return num_streams + else: + default_num_streams = max(1, int(num_cores / 2)) + _LOGGER.info( + "num_streams default value chosen of {}. " + "This requires tuning and may be sub-optimal".format( + default_num_streams + ) + ) + return default_num_streams + + +def load_custom_engine(custom_engine_identifier: str): + """ + import a custom engine based off the specified `custom_engine_identifier` + from user specified script + + :param custom_engine_identifier: string in the form of + ': + :return: custom engine class object + """ + path, engine_object_name = custom_engine_identifier.split(":") + spec = importlib.util.spec_from_file_location("user_defined_custom_engine", path) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return getattr(module, engine_object_name) + + +def benchmark_pipeline( + model_path: str, + batch_size: int = 1, + input_config: str = "", + num_cores: int = None, + scenario: str = "sync", + time: int = 10, + warmup_time: int = 2, + num_streams: int = None, + thread_pinning: str = "core", + engine: str = DEEPSPARSE_ENGINE, + quiet: bool = False, + export_path: str = None, +) -> Dict: + if quiet: + set_logging_level(logging.WARN) + + if num_cores is None: + num_cores = cpu_architecture().num_available_physical_cores + + decide_thread_pinning(thread_pinning) + + scenario = parse_scenario(scenario.lower()) + scheduler = parse_scheduler(scenario) + input_config = parse_input_config(input_config) + + orig_model_path = model_path + model_path = model_to_path(model_path) + num_streams = parse_num_streams(num_streams, num_cores, scenario) + + # Compile the ONNX into a runnable model + if engine == DEEPSPARSE_ENGINE: + model = compile_model( + model=model_path, + batch_size=batch_size, + num_cores=num_cores, + num_streams=num_streams, + scheduler=scheduler, + input_shapes=input_shapes, + ) + elif engine == ORT_ENGINE: + model = ORTEngine( + model=model_path, + batch_size=batch_size, + num_cores=num_cores, + input_shapes=input_shapes, + ) + elif ":" in engine: + engine = load_custom_engine(custom_engine_identifier=engine) + model = engine( + model_path=model_path, + batch_size=batch_size, + num_cores=num_cores, + ) + else: + raise ValueError(f"Invalid engine choice '{engine}'") + _LOGGER.info(model) + + # Generate random inputs to feed the model + # TODO(mgoin): should be able to query Engine class instead of loading ONNX + if input_shapes: + with override_onnx_input_shapes(model_path, input_shapes) as model_path: + input_list = generate_random_inputs(model_path, batch_size) + elif hasattr(model, "generate_random_inputs"): + input_list = model.generate_random_inputs() + elif hasattr(engine, "generate_random_inputs"): + input_list = engine.generate_random_inputs(batch_size=batch_size) + else: + input_list = generate_random_inputs(model_path, batch_size) + + # Benchmark + _LOGGER.info( + "Starting '{}' performance measurements for {} seconds".format(scenario, time) + ) + benchmark_result = model_stream_benchmark( + model, + input_list, + scenario=scenario, + seconds_to_run=time, + seconds_to_warmup=warmup_time, + num_streams=num_streams, + ) + + export_dict = { + "engine": str(model), + "version": __version__, + "orig_model_path": orig_model_path, + "model_path": model_path, + "batch_size": batch_size, + "input_config": input_config, + "num_cores": num_cores, + "scenario": scenario, + "scheduler": str(model.scheduler), + "seconds_to_run": time, + "num_streams": num_streams, + "benchmark_result": benchmark_result, + "fraction_of_supported_ops": getattr(model, "fraction_of_supported_ops", None), + } + + # Export results + if export_path: + _LOGGER.info("Saving benchmark results to JSON file at {}".format(export_path)) + with open(export_path, "w") as out: + json.dump(export_dict, out, indent=2) + + return export_dict + + +def main(): + + args = parse_args() + + result = benchmark_pipeline( + model_path=args.model_path, + batch_size=args.batch_size, + input_config=args.input_config, + num_cores=args.num_cores, + scenario=args.scenario, + time=args.time, + warmup_time=args.warmup_time, + num_streams=args.num_streams, + thread_pinning=args.thread_pinning, + engine=args.engine, + quiet=args.quiet, + export_path=args.export_path, + ) + + # Results summary + print("Original Model Path: {}".format(args.model_path)) + print("Batch Size: {}".format(args.batch_size)) + print("Scenario: {}".format(args.scenario)) + print( + "Throughput (items/sec): {:.4f}".format( + result["benchmark_result"]["items_per_sec"] + ) + ) + print("Latency Mean (ms/batch): {:.4f}".format(result["benchmark_result"]["mean"])) + print( + "Latency Median (ms/batch): {:.4f}".format(result["benchmark_result"]["median"]) + ) + print("Latency Std (ms/batch): {:.4f}".format(result["benchmark_result"]["std"])) + print("Iterations: {}".format(int(result["benchmark_result"]["iterations"]))) + + +if __name__ == "__main__": + main() From e26eaa72f297082a99378527fb8fbfcc2b8dde38 Mon Sep 17 00:00:00 2001 From: Sara Adkins Date: Mon, 24 Jul 2023 10:14:22 -0400 Subject: [PATCH 02/37] simple script --- src/deepsparse/benchmark/pipeline_sandbox.py | 62 ++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 src/deepsparse/benchmark/pipeline_sandbox.py diff --git a/src/deepsparse/benchmark/pipeline_sandbox.py b/src/deepsparse/benchmark/pipeline_sandbox.py new file mode 100644 index 0000000000..3885c90d44 --- /dev/null +++ b/src/deepsparse/benchmark/pipeline_sandbox.py @@ -0,0 +1,62 @@ +import argparse +import json +import random +import string + +from deepsparse.pipeline import Pipeline + +def parse_args(): + parser = argparse.ArgumentParser( + description="Benchmark DeepSparse Pipelines" + ) + parser.add_argument( + "task_name", + type=str + ) + parser.add_argument( + "model_path", + type=str + ) + parser.add_argument( + "-i", + "--input_type", + type=str, + default="dummy", + choices=["dummy", "real"], + ) + parser.add_argument( + "-c", + "--config", + type=str, + default="config.json", + ) + + return parser.parse_args() + +def main(): + args = parse_args() + + config_file = open(args.config) + config = json.load(config_file) + config_file.close() + + task_name = args.task_name + model_path = args.model_path + + data_length = config['length'] + num_examples = config['num_examples'] + examples = [] + if config['input_data_type'] == "string": + for _ in range(num_examples): + rand_string = ''.join(random.choices(string.printable, k=data_length)) + examples.append(rand_string) + print(examples) + + pipeline = Pipeline.create(task=task_name, model_path=model_path) + output = pipeline(examples) + print(output) + print(pipeline.timer_manger) + print(pipeline.timer_manager.stages) + +if __name__ == "__main__": + main() \ No newline at end of file From 773229639028be571d063879a30ed19a35529945 Mon Sep 17 00:00:00 2001 From: Sara Adkins Date: Mon, 24 Jul 2023 13:53:24 -0400 Subject: [PATCH 03/37] share code and cleanup --- src/deepsparse/benchmark/benchmark_model.py | 86 +---- .../benchmark/benchmark_pipeline.py | 360 +++--------------- src/deepsparse/benchmark/helpers.py | 78 ++++ src/deepsparse/benchmark/pipeline_sandbox.py | 62 --- 4 files changed, 146 insertions(+), 440 deletions(-) create mode 100644 src/deepsparse/benchmark/helpers.py delete mode 100644 src/deepsparse/benchmark/pipeline_sandbox.py diff --git a/src/deepsparse/benchmark/benchmark_model.py b/src/deepsparse/benchmark/benchmark_model.py index 0bef7c57ed..ea280cc809 100644 --- a/src/deepsparse/benchmark/benchmark_model.py +++ b/src/deepsparse/benchmark/benchmark_model.py @@ -98,7 +98,7 @@ import os from typing import Dict -from deepsparse import Scheduler, __version__, compile_model +from deepsparse import __version__, compile_model from deepsparse.benchmark.ort_engine import ORTEngine from deepsparse.benchmark.stream_benchmark import model_stream_benchmark from deepsparse.cpu import cpu_architecture @@ -109,6 +109,12 @@ override_onnx_input_shapes, parse_input_shapes, ) +from deepsparse.benchmark.helpers import ( + decide_thread_pinning, + parse_scheduler, + parse_scenario, + parse_num_streams +) __all__ = ["benchmark_model"] @@ -241,78 +247,6 @@ def parse_args(): return parser.parse_args() -def decide_thread_pinning(pinning_mode: str) -> None: - pinning_mode = pinning_mode.lower() - if pinning_mode in "core": - os.environ["NM_BIND_THREADS_TO_CORES"] = "1" - _LOGGER.info("Thread pinning to cores enabled") - elif pinning_mode in "numa": - os.environ["NM_BIND_THREADS_TO_CORES"] = "0" - os.environ["NM_BIND_THREADS_TO_SOCKETS"] = "1" - _LOGGER.info("Thread pinning to socket/numa nodes enabled") - elif pinning_mode in "none": - os.environ["NM_BIND_THREADS_TO_CORES"] = "0" - os.environ["NM_BIND_THREADS_TO_SOCKETS"] = "0" - _LOGGER.info("Thread pinning disabled, performance may be sub-optimal") - else: - _LOGGER.info( - "Recieved invalid option for thread_pinning '{}', skipping".format( - pinning_mode - ) - ) - - -def parse_scheduler(scenario: str) -> Scheduler: - scenario = scenario.lower() - if scenario == "multistream": - return Scheduler.multi_stream - elif scenario == "singlestream": - return Scheduler.single_stream - elif scenario == "elastic": - return Scheduler.elastic - else: - return Scheduler.multi_stream - - -def parse_scenario(scenario: str) -> str: - scenario = scenario.lower() - if scenario == "async": - return "multistream" - elif scenario == "sync": - return "singlestream" - elif scenario == "elastic": - return "elastic" - else: - _LOGGER.info( - "Recieved invalid option for scenario'{}', defaulting to async".format( - scenario - ) - ) - return "multistream" - - -def parse_num_streams(num_streams: int, num_cores: int, scenario: str): - # If model.num_streams is set, and the scenario is either "multi_stream" or - # "elastic", use the value of num_streams given to us by the model, otherwise - # use a semi-sane default value. - if scenario == "sync" or scenario == "singlestream": - if num_streams and num_streams > 1: - _LOGGER.info("num_streams reduced to 1 for singlestream scenario.") - return 1 - else: - if num_streams: - return num_streams - else: - default_num_streams = max(1, int(num_cores / 2)) - _LOGGER.info( - "num_streams default value chosen of {}. " - "This requires tuning and may be sub-optimal".format( - default_num_streams - ) - ) - return default_num_streams - - def load_custom_engine(custom_engine_identifier: str): """ import a custom engine based off the specified `custom_engine_identifier` @@ -349,15 +283,15 @@ def benchmark_model( if num_cores is None: num_cores = cpu_architecture().num_available_physical_cores - decide_thread_pinning(thread_pinning) + decide_thread_pinning(thread_pinning, _LOGGER) - scenario = parse_scenario(scenario.lower()) + scenario = parse_scenario(scenario.lower(), _LOGGER) scheduler = parse_scheduler(scenario) input_shapes = parse_input_shapes(input_shapes) orig_model_path = model_path model_path = model_to_path(model_path) - num_streams = parse_num_streams(num_streams, num_cores, scenario) + num_streams = parse_num_streams(num_streams, num_cores, scenario, _LOGGER) # Compile the ONNX into a runnable model if engine == DEEPSPARSE_ENGINE: diff --git a/src/deepsparse/benchmark/benchmark_pipeline.py b/src/deepsparse/benchmark/benchmark_pipeline.py index c33c546130..9b27f3a6e2 100644 --- a/src/deepsparse/benchmark/benchmark_pipeline.py +++ b/src/deepsparse/benchmark/benchmark_pipeline.py @@ -12,91 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. -""" -Benchmarking script ONNX models in a DeepSparse pipeline - -########## -Command help: -usage: deepsparse.benchmark_pipeline [-h] [-b BATCH_SIZE] [-input_config INPUT_CONFIG] - [-ncores NUM_CORES] [-s {async,sync,elastic}] - [-t TIME] [-w WARMUP_TIME] [-nstreams NUM_STREAMS] - [-pin {none,core,numa}] - [-e {deepsparse,onnxruntime}] [-q] - [-x EXPORT_PATH] - model_path - -Benchmark ONNX models in a DeepSparse pipeline - -positional arguments: - model_path Path to an ONNX model file or SparseZoo model stub. - -optional arguments: - -h, --help show this help message and exit. - -b BATCH_SIZE, --batch_size BATCH_SIZE - The batch size to run the analysis for. Must be - greater than 0. - -input_config INPUT_CONFIG - JSON file containing schema for input data. - -ncores NUM_CORES, --num_cores NUM_CORES - The number of physical cores to run the analysis on, - defaults to all physical cores available on the system. - -s {async,sync,elastic}, --scenario {async,sync,elastic} - Choose between using the async, sync and elastic - scenarios. Sync and async are similar to the single- - stream/multi-stream scenarios. Elastic is a newer - scenario that behaves similarly to the async scenario - but uses a different scheduling backend. Default value - is sync. - -t TIME, --time TIME The number of seconds the benchmark will run. Default - is 10 seconds. - -w WARMUP_TIME, --warmup_time WARMUP_TIME - The number of seconds the benchmark will warmup before - running.Default is 2 seconds. - -nstreams NUM_STREAMS, --num_streams NUM_STREAMS - The number of streams that will submit inferences in - parallel using async scenario. Default is - automatically determined for given hardware and may be - sub-optimal. - -pin {none,core,numa}, --thread_pinning {none,core,numa} - Enable binding threads to cores ('core' the default), - threads to cores on sockets ('numa'), or disable - ('none'). - -e {deepsparse,onnxruntime}, --engine {deepsparse,onnxruntime} - Inference engine backend to run eval on. Choices are - 'deepsparse', 'onnxruntime'. Default is 'deepsparse'. - -q, --quiet Lower logging verbosity. - -x EXPORT_PATH, --export_path EXPORT_PATH - Store results into a JSON file. - -########## -Example on a BERT from SparseZoo: -deepsparse.benchmark \ - zoo:nlp/question_answering/bert-base/pytorch/huggingface/squad/base-none - -########## -Example on a BERT from SparseZoo with sequence length 512: -deepsparse.benchmark \ - zoo:nlp/question_answering/bert-base/pytorch/huggingface/squad/base-none \ - --input_shapes "[1,512],[1,512],[1,512]" - -########## -Example on local ONNX model: -deepsparse.benchmark /PATH/TO/model.onnx - -########## -Example on local ONNX model at batch size 32 with synchronous (singlestream) execution: -deepsparse.benchmark /PATH/TO/model.onnx --batch_size 32 --scenario sync - -""" - import argparse import importlib import json +import string import logging +import random import os from typing import Dict -from deepsparse import Scheduler, __version__, compile_model +from deepsparse import Pipeline from deepsparse.benchmark.ort_engine import ORTEngine from deepsparse.benchmark.stream_benchmark import model_stream_benchmark from deepsparse.cpu import cpu_architecture @@ -107,9 +32,15 @@ override_onnx_input_shapes, parse_input_shapes, ) +from deepsparse.benchmark.helpers import ( + decide_thread_pinning, + parse_scheduler, + parse_scenario, + parse_num_streams +) -__all__ = ["benchmark_pipelin"] +__all__ = ["benchmark_pipeline"] _LOGGER = logging.getLogger(__name__) @@ -120,15 +51,33 @@ def parse_args(): parser = argparse.ArgumentParser( - description="Benchmark ONNX models in the DeepSparse Engine" + description="Benchmark DeepSparse Pipelines" + ) + parser.add_argument( + "task_name", + type=str, + help="Type of pipeline to run" ) - parser.add_argument( "model_path", type=str, help="Path to an ONNX model file or SparseZoo model stub", ) - + parser.add_argument( + "-c", + "--input_config", + type=str, + default="config.json", + help="JSON file containing schema for input data" + ) + parser.add_argument( + "-i", + "--input_type", + type=str, + default="dummy", + choices=["dummy", "real"], + help="Type of input data to use, real or randomly generated" + ) parser.add_argument( "-b", "--batch_size", @@ -136,13 +85,6 @@ def parse_args(): default=1, help="The batch size to run the analysis for. Must be greater than 0", ) - parser.add_argument( - "-i", - "--input_config", - type=str, - default="config.json", - help="JSON file containing schema for input data" - ) parser.add_argument( "-ncores", "--num_cores", @@ -173,16 +115,6 @@ def parse_args(): default=10, help="The number of seconds the benchmark will run. Default is 10 seconds.", ) - parser.add_argument( - "-w", - "--warmup_time", - type=int, - default=2, - help=( - "The number of seconds the benchmark will warmup before running." - "Default is 2 seconds." - ), - ) parser.add_argument( "-nstreams", "--num_streams", @@ -236,238 +168,62 @@ def parse_args(): return parser.parse_args() -def decide_thread_pinning(pinning_mode: str) -> None: - pinning_mode = pinning_mode.lower() - if pinning_mode in "core": - os.environ["NM_BIND_THREADS_TO_CORES"] = "1" - _LOGGER.info("Thread pinning to cores enabled") - elif pinning_mode in "numa": - os.environ["NM_BIND_THREADS_TO_CORES"] = "0" - os.environ["NM_BIND_THREADS_TO_SOCKETS"] = "1" - _LOGGER.info("Thread pinning to socket/numa nodes enabled") - elif pinning_mode in "none": - os.environ["NM_BIND_THREADS_TO_CORES"] = "0" - os.environ["NM_BIND_THREADS_TO_SOCKETS"] = "0" - _LOGGER.info("Thread pinning disabled, performance may be sub-optimal") - else: - _LOGGER.info( - "Recieved invalid option for thread_pinning '{}', skipping".format( - pinning_mode - ) - ) - -def parse_input_config(input_config: str) -> Dict[str, object]: - return json.loads(input_config) - -def parse_scheduler(scenario: str) -> Scheduler: - scenario = scenario.lower() - if scenario == "multistream": - return Scheduler.multi_stream - elif scenario == "singlestream": - return Scheduler.single_stream - elif scenario == "elastic": - return Scheduler.elastic - else: - return Scheduler.multi_stream - - -def parse_scenario(scenario: str) -> str: - scenario = scenario.lower() - if scenario == "async": - return "multistream" - elif scenario == "sync": - return "singlestream" - elif scenario == "elastic": - return "elastic" - else: - _LOGGER.info( - "Recieved invalid option for scenario'{}', defaulting to async".format( - scenario - ) - ) - return "multistream" - - -def parse_num_streams(num_streams: int, num_cores: int, scenario: str): - # If model.num_streams is set, and the scenario is either "multi_stream" or - # "elastic", use the value of num_streams given to us by the model, otherwise - # use a semi-sane default value. - if scenario == "sync" or scenario == "singlestream": - if num_streams and num_streams > 1: - _LOGGER.info("num_streams reduced to 1 for singlestream scenario.") - return 1 - else: - if num_streams: - return num_streams - else: - default_num_streams = max(1, int(num_cores / 2)) - _LOGGER.info( - "num_streams default value chosen of {}. " - "This requires tuning and may be sub-optimal".format( - default_num_streams - ) - ) - return default_num_streams - - -def load_custom_engine(custom_engine_identifier: str): - """ - import a custom engine based off the specified `custom_engine_identifier` - from user specified script - - :param custom_engine_identifier: string in the form of - ': - :return: custom engine class object - """ - path, engine_object_name = custom_engine_identifier.split(":") - spec = importlib.util.spec_from_file_location("user_defined_custom_engine", path) - module = importlib.util.module_from_spec(spec) - spec.loader.exec_module(module) - return getattr(module, engine_object_name) +def parse_input_config(input_config_file: str) -> Dict[str, object]: + config_file = open(input_config_file) + config = json.load(config_file) + config_file.close() + return config def benchmark_pipeline( model_path: str, - batch_size: int = 1, - input_config: str = "", + task: str, + input_config: str, + input_type: str = "dummy", num_cores: int = None, scenario: str = "sync", time: int = 10, - warmup_time: int = 2, num_streams: int = None, thread_pinning: str = "core", engine: str = DEEPSPARSE_ENGINE, quiet: bool = False, export_path: str = None, ) -> Dict: - if quiet: - set_logging_level(logging.WARN) - - if num_cores is None: - num_cores = cpu_architecture().num_available_physical_cores - - decide_thread_pinning(thread_pinning) - - scenario = parse_scenario(scenario.lower()) - scheduler = parse_scheduler(scenario) - input_config = parse_input_config(input_config) - - orig_model_path = model_path - model_path = model_to_path(model_path) - num_streams = parse_num_streams(num_streams, num_cores, scenario) - - # Compile the ONNX into a runnable model - if engine == DEEPSPARSE_ENGINE: - model = compile_model( - model=model_path, - batch_size=batch_size, - num_cores=num_cores, - num_streams=num_streams, - scheduler=scheduler, - input_shapes=input_shapes, - ) - elif engine == ORT_ENGINE: - model = ORTEngine( - model=model_path, - batch_size=batch_size, - num_cores=num_cores, - input_shapes=input_shapes, - ) - elif ":" in engine: - engine = load_custom_engine(custom_engine_identifier=engine) - model = engine( - model_path=model_path, - batch_size=batch_size, - num_cores=num_cores, - ) - else: - raise ValueError(f"Invalid engine choice '{engine}'") - _LOGGER.info(model) - - # Generate random inputs to feed the model - # TODO(mgoin): should be able to query Engine class instead of loading ONNX - if input_shapes: - with override_onnx_input_shapes(model_path, input_shapes) as model_path: - input_list = generate_random_inputs(model_path, batch_size) - elif hasattr(model, "generate_random_inputs"): - input_list = model.generate_random_inputs() - elif hasattr(engine, "generate_random_inputs"): - input_list = engine.generate_random_inputs(batch_size=batch_size) - else: - input_list = generate_random_inputs(model_path, batch_size) - - # Benchmark - _LOGGER.info( - "Starting '{}' performance measurements for {} seconds".format(scenario, time) - ) - benchmark_result = model_stream_benchmark( - model, - input_list, - scenario=scenario, - seconds_to_run=time, - seconds_to_warmup=warmup_time, - num_streams=num_streams, - ) + + config = parse_input_config(input_config) - export_dict = { - "engine": str(model), - "version": __version__, - "orig_model_path": orig_model_path, - "model_path": model_path, - "batch_size": batch_size, - "input_config": input_config, - "num_cores": num_cores, - "scenario": scenario, - "scheduler": str(model.scheduler), - "seconds_to_run": time, - "num_streams": num_streams, - "benchmark_result": benchmark_result, - "fraction_of_supported_ops": getattr(model, "fraction_of_supported_ops", None), - } + data_length = config['length'] + num_examples = config['num_examples'] + examples = [] + if config['input_data_type'] == "string": + for _ in range(num_examples): + rand_string = ''.join(random.choices(string.printable, k=data_length)) + examples.append(rand_string) + print(examples) - # Export results - if export_path: - _LOGGER.info("Saving benchmark results to JSON file at {}".format(export_path)) - with open(export_path, "w") as out: - json.dump(export_dict, out, indent=2) + pipeline = Pipeline.create(task=task, model_path=model_path) + output = pipeline(examples) + print(output) - return export_dict + return {} def main(): - args = parse_args() result = benchmark_pipeline( model_path=args.model_path, - batch_size=args.batch_size, - input_config=args.input_config, - num_cores=args.num_cores, - scenario=args.scenario, - time=args.time, - warmup_time=args.warmup_time, - num_streams=args.num_streams, - thread_pinning=args.thread_pinning, - engine=args.engine, - quiet=args.quiet, - export_path=args.export_path, + task=args.task_name, + input_config = args.input_config, + input_type = args.input_type ) # Results summary print("Original Model Path: {}".format(args.model_path)) + print("Task: {}".format(args.task_name)) + print("Input Type: {}".format(args.input_type)) print("Batch Size: {}".format(args.batch_size)) print("Scenario: {}".format(args.scenario)) - print( - "Throughput (items/sec): {:.4f}".format( - result["benchmark_result"]["items_per_sec"] - ) - ) - print("Latency Mean (ms/batch): {:.4f}".format(result["benchmark_result"]["mean"])) - print( - "Latency Median (ms/batch): {:.4f}".format(result["benchmark_result"]["median"]) - ) - print("Latency Std (ms/batch): {:.4f}".format(result["benchmark_result"]["std"])) - print("Iterations: {}".format(int(result["benchmark_result"]["iterations"]))) if __name__ == "__main__": diff --git a/src/deepsparse/benchmark/helpers.py b/src/deepsparse/benchmark/helpers.py new file mode 100644 index 0000000000..14c90653a6 --- /dev/null +++ b/src/deepsparse/benchmark/helpers.py @@ -0,0 +1,78 @@ +import os + +from deepsparse import Scheduler + +__all__ = [ + "decide_thread_pinning", + "parse_scheduler", + "parse_scenario", + "parse_num_streams" +] + +def decide_thread_pinning(pinning_mode: str, logger: object) -> None: + pinning_mode = pinning_mode.lower() + if pinning_mode in "core": + os.environ["NM_BIND_THREADS_TO_CORES"] = "1" + logger.info("Thread pinning to cores enabled") + elif pinning_mode in "numa": + os.environ["NM_BIND_THREADS_TO_CORES"] = "0" + os.environ["NM_BIND_THREADS_TO_SOCKETS"] = "1" + logger.info("Thread pinning to socket/numa nodes enabled") + elif pinning_mode in "none": + os.environ["NM_BIND_THREADS_TO_CORES"] = "0" + os.environ["NM_BIND_THREADS_TO_SOCKETS"] = "0" + logger.info("Thread pinning disabled, performance may be sub-optimal") + else: + logger.info( + "Recieved invalid option for thread_pinning '{}', skipping".format( + pinning_mode + ) + ) + +def parse_scheduler(scenario: str) -> Scheduler: + scenario = scenario.lower() + if scenario == "multistream": + return Scheduler.multi_stream + elif scenario == "singlestream": + return Scheduler.single_stream + elif scenario == "elastic": + return Scheduler.elastic + else: + return Scheduler.multi_stream + +def parse_scenario(scenario: str, logger: object) -> str: + scenario = scenario.lower() + if scenario == "async": + return "multistream" + elif scenario == "sync": + return "singlestream" + elif scenario == "elastic": + return "elastic" + else: + logger.info( + "Recieved invalid option for scenario'{}', defaulting to async".format( + scenario + ) + ) + return "multistream" + +def parse_num_streams(num_streams: int, num_cores: int, scenario: str, logger: object): + # If model.num_streams is set, and the scenario is either "multi_stream" or + # "elastic", use the value of num_streams given to us by the model, otherwise + # use a semi-sane default value. + if scenario == "sync" or scenario == "singlestream": + if num_streams and num_streams > 1: + logger.info("num_streams reduced to 1 for singlestream scenario.") + return 1 + else: + if num_streams: + return num_streams + else: + default_num_streams = max(1, int(num_cores / 2)) + logger.info( + "num_streams default value chosen of {}. " + "This requires tuning and may be sub-optimal".format( + default_num_streams + ) + ) + return default_num_streams diff --git a/src/deepsparse/benchmark/pipeline_sandbox.py b/src/deepsparse/benchmark/pipeline_sandbox.py deleted file mode 100644 index 3885c90d44..0000000000 --- a/src/deepsparse/benchmark/pipeline_sandbox.py +++ /dev/null @@ -1,62 +0,0 @@ -import argparse -import json -import random -import string - -from deepsparse.pipeline import Pipeline - -def parse_args(): - parser = argparse.ArgumentParser( - description="Benchmark DeepSparse Pipelines" - ) - parser.add_argument( - "task_name", - type=str - ) - parser.add_argument( - "model_path", - type=str - ) - parser.add_argument( - "-i", - "--input_type", - type=str, - default="dummy", - choices=["dummy", "real"], - ) - parser.add_argument( - "-c", - "--config", - type=str, - default="config.json", - ) - - return parser.parse_args() - -def main(): - args = parse_args() - - config_file = open(args.config) - config = json.load(config_file) - config_file.close() - - task_name = args.task_name - model_path = args.model_path - - data_length = config['length'] - num_examples = config['num_examples'] - examples = [] - if config['input_data_type'] == "string": - for _ in range(num_examples): - rand_string = ''.join(random.choices(string.printable, k=data_length)) - examples.append(rand_string) - print(examples) - - pipeline = Pipeline.create(task=task_name, model_path=model_path) - output = pipeline(examples) - print(output) - print(pipeline.timer_manger) - print(pipeline.timer_manager.stages) - -if __name__ == "__main__": - main() \ No newline at end of file From 956dbe8140e9c49fe08f3419798be2f9d7997e6b Mon Sep 17 00:00:00 2001 From: Sara Adkins Date: Tue, 25 Jul 2023 09:05:04 -0400 Subject: [PATCH 04/37] adding additional cmd line arguments --- .../benchmark/benchmark_pipeline.py | 43 ++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/src/deepsparse/benchmark/benchmark_pipeline.py b/src/deepsparse/benchmark/benchmark_pipeline.py index 9b27f3a6e2..0fcc98ddd0 100644 --- a/src/deepsparse/benchmark/benchmark_pipeline.py +++ b/src/deepsparse/benchmark/benchmark_pipeline.py @@ -21,6 +21,7 @@ import os from typing import Dict +from deepsparse import __version__, compile_model from deepsparse import Pipeline from deepsparse.benchmark.ort_engine import ORTEngine from deepsparse.benchmark.stream_benchmark import model_stream_benchmark @@ -180,6 +181,7 @@ def benchmark_pipeline( task: str, input_config: str, input_type: str = "dummy", + batch_size: int = 1, num_cores: int = None, scenario: str = "sync", time: int = 10, @@ -190,6 +192,36 @@ def benchmark_pipeline( export_path: str = None, ) -> Dict: + if quiet: + set_logging_level(logging.WARN) + + if num_cores is None: + num_cores = cpu_architecture().num_available_physical_cores + + decide_thread_pinning(thread_pinning, _LOGGER) + scenario = parse_scenario(scenario.lower(), _LOGGER) + scheduler = parse_scheduler(scenario) + num_streams = parse_num_streams(num_streams, num_cores, scenario, _LOGGER) + + # Compile the ONNX into a runnable model + if engine == DEEPSPARSE_ENGINE: + model = compile_model( + model=model_path, + batch_size=batch_size, + num_cores=num_cores, + num_streams=num_streams, + scheduler=scheduler, + ) + elif engine == ORT_ENGINE: + model = ORTEngine( + model=model_path, + batch_size=batch_size, + num_cores=num_cores, + ) + else: + raise ValueError(f"Invalid engine choice '{engine}'") + _LOGGER.info(model) + config = parse_input_config(input_config) data_length = config['length'] @@ -215,7 +247,16 @@ def main(): model_path=args.model_path, task=args.task_name, input_config = args.input_config, - input_type = args.input_type + input_type = args.input_type, + batch_size=args.batch_size, + num_cores=args.num_cores, + scenario=args.scenario, + time=args.time, + num_streams=args.num_streams, + thread_pinning=args.thread_pinning, + engine=args.engine, + quiet=args.quiet, + export_path=args.export_path, ) # Results summary From 6cbc99eb4b14e5e1ddefeaccfe5ae2a91fd1088a Mon Sep 17 00:00:00 2001 From: Sara Adkins Date: Tue, 25 Jul 2023 12:13:18 -0400 Subject: [PATCH 05/37] image and text inputs --- .../benchmark/benchmark_pipeline.py | 75 ++++++++++++++----- 1 file changed, 56 insertions(+), 19 deletions(-) diff --git a/src/deepsparse/benchmark/benchmark_pipeline.py b/src/deepsparse/benchmark/benchmark_pipeline.py index 0fcc98ddd0..a56cc22486 100644 --- a/src/deepsparse/benchmark/benchmark_pipeline.py +++ b/src/deepsparse/benchmark/benchmark_pipeline.py @@ -19,7 +19,9 @@ import logging import random import os -from typing import Dict +from typing import Dict, List +import time +import numpy from deepsparse import __version__, compile_model from deepsparse import Pipeline @@ -27,6 +29,7 @@ from deepsparse.benchmark.stream_benchmark import model_stream_benchmark from deepsparse.cpu import cpu_architecture from deepsparse.log import set_logging_level +from deepsparse.utils.timer import StagedTimer from deepsparse.utils import ( generate_random_inputs, model_to_path, @@ -184,13 +187,13 @@ def benchmark_pipeline( batch_size: int = 1, num_cores: int = None, scenario: str = "sync", - time: int = 10, + seconds_to_run: int = 10, num_streams: int = None, thread_pinning: str = "core", engine: str = DEEPSPARSE_ENGINE, quiet: bool = False, export_path: str = None, -) -> Dict: +) -> List[StagedTimer]: if quiet: set_logging_level(logging.WARN) @@ -223,26 +226,43 @@ def benchmark_pipeline( _LOGGER.info(model) config = parse_input_config(input_config) + pipeline = Pipeline.create(task=task, model_path=model_path) - data_length = config['length'] - num_examples = config['num_examples'] - examples = [] + input_data = [] if config['input_data_type'] == "string": - for _ in range(num_examples): + data_length = config['sequence_length'] + for _ in range(batch_size): rand_string = ''.join(random.choices(string.printable, k=data_length)) - examples.append(rand_string) - print(examples) + input_data.append(rand_string) + inputs = pipeline.input_schema(sequences=input_data) + elif config['input_data_type'] == "array": + image_shape = config["input_array_shape"] + dtype = config["input_array_dtype"] + for _ in range(batch_size): + if dtype == "uint8": + rand_array = numpy.random.randint(0,high=255, size=image_shape).astype(dtype) + rand_array = numpy.random.rand(*image_shape).astype(dtype) + input_data.append(rand_array) + inputs = pipeline.input_schema(images=input_data) - pipeline = Pipeline.create(task=task, model_path=model_path) - output = pipeline(examples) - print(output) + benchmark_end_time = time.perf_counter() + seconds_to_run + batch_timings = [] + while time.perf_counter() < benchmark_end_time: + output = pipeline(inputs) + batch_timings.append(pipeline.timer_manager.latest) - return {} + return batch_timings def main(): args = parse_args() + print("Original Model Path: {}".format(args.model_path)) + print("Task: {}".format(args.task_name)) + print("Input Type: {}".format(args.input_type)) + print("Batch Size: {}".format(args.batch_size)) + print("Scenario: {}".format(args.scenario)) + result = benchmark_pipeline( model_path=args.model_path, task=args.task_name, @@ -251,7 +271,7 @@ def main(): batch_size=args.batch_size, num_cores=args.num_cores, scenario=args.scenario, - time=args.time, + seconds_to_run=args.time, num_streams=args.num_streams, thread_pinning=args.thread_pinning, engine=args.engine, @@ -260,11 +280,28 @@ def main(): ) # Results summary - print("Original Model Path: {}".format(args.model_path)) - print("Task: {}".format(args.task_name)) - print("Input Type: {}".format(args.input_type)) - print("Batch Size: {}".format(args.batch_size)) - print("Scenario: {}".format(args.scenario)) + batches_processed = len(result) + total_time = sum(st.times['total_inference'] for st in result) + print("Processed {} batches in {} seconds".format(batches_processed, total_time)) + throughput = round(batches_processed / total_time, 4) + print("Throughput: {} batches/sec".format(throughput)) + total_pre_process = sum(st.times['pre_process'] for st in result) + total_post_process = sum(st.times['post_process'] for st in result) + total_engine_forward = sum(st.times['engine_forward'] for st in result) + + avg_pre_process = round(total_pre_process / batches_processed * 1000, 4) + avg_post_process = round(total_post_process / batches_processed * 1000, 4) + avg_engine_forward = round(total_engine_forward / batches_processed * 1000, 4) + + print("Average Pre-Process: {} ms".format(avg_pre_process)) + print("Average Post-Process: {} ms".format(avg_post_process)) + print("Average Engine Forward: {} ms".format(avg_engine_forward)) + + total_time = total_pre_process + total_post_process + total_engine_forward + percent_pre = round(total_pre_process / total_time * 100, 2) + percent_post = round(total_post_process / total_time * 100, 2) + percent_forward = round(total_engine_forward / total_time * 100, 2) + print("{}% Pre-processing, {}% Post-processing, {}% Inference".format(percent_pre, percent_post, percent_forward)) if __name__ == "__main__": From 0143d318185a7588d6dd8c2e0b9eb8629570ada9 Mon Sep 17 00:00:00 2001 From: Sara Adkins Date: Tue, 25 Jul 2023 14:33:07 -0400 Subject: [PATCH 06/37] json export of statistics --- .../benchmark/benchmark_pipeline.py | 199 +++++++++++------- 1 file changed, 124 insertions(+), 75 deletions(-) diff --git a/src/deepsparse/benchmark/benchmark_pipeline.py b/src/deepsparse/benchmark/benchmark_pipeline.py index a56cc22486..5440724066 100644 --- a/src/deepsparse/benchmark/benchmark_pipeline.py +++ b/src/deepsparse/benchmark/benchmark_pipeline.py @@ -13,29 +13,21 @@ # limitations under the License. import argparse -import importlib import json import string import logging import random -import os -from typing import Dict, List +from typing import Dict, List, Tuple import time import numpy +import threading +import queue -from deepsparse import __version__, compile_model +from deepsparse import __version__ from deepsparse import Pipeline -from deepsparse.benchmark.ort_engine import ORTEngine -from deepsparse.benchmark.stream_benchmark import model_stream_benchmark from deepsparse.cpu import cpu_architecture from deepsparse.log import set_logging_level from deepsparse.utils.timer import StagedTimer -from deepsparse.utils import ( - generate_random_inputs, - model_to_path, - override_onnx_input_shapes, - parse_input_shapes, -) from deepsparse.benchmark.helpers import ( decide_thread_pinning, parse_scheduler, @@ -141,19 +133,6 @@ def parse_args(): "threads to cores on sockets ('numa'), or disable ('none')" ), ) - parser.add_argument( - "-e", - "--engine", - type=str, - default=DEEPSPARSE_ENGINE, - help=( - "Inference engine backend to run eval on. Choices are 'deepsparse', " - "'onnxruntime'. Default is 'deepsparse'. Can also specify a user " - "defined engine class by giving the script and class name in the " - "following format :. This " - "engine class will be dynamically imported during runtime" - ), - ) parser.add_argument( "-q", "--quiet", @@ -171,9 +150,63 @@ def parse_args(): return parser.parse_args() +class PipelineExecutorThread(threading.Thread): + def __init__( + self, + pipeline: Pipeline, + inputs: List[any], + time_queue: queue.Queue, + max_time: float + ): + super(PipelineExecutorThread, self).__init__() + self._pipeline = pipeline + self._inputs = inputs + self._time_queue = time_queue + self._max_time = max_time + + def run(self): + while time.perf_counter() < self._max_time: + output = self._pipeline(self._inputs) + self._time_queue.put(self._pipeline.timer_manager.latest) + + +def singlestream_benchmark( + pipeline: Pipeline, + inputs: List[any], + seconds_to_run: float +) -> List[StagedTimer]: + benchmark_end_time = time.perf_counter() + seconds_to_run + batch_timings = [] + while time.perf_counter() < benchmark_end_time: + output = pipeline(inputs) + batch_timings.append(pipeline.timer_manager.latest) + + return batch_timings + +def multistream_benchmark( + pipeline: Pipeline, + inputs: List[any], + seconds_to_run: float, + num_streams: int, +) -> List[StagedTimer]: + time_queue = queue.Queue() + max_time = time.perf_counter() + seconds_to_run + threads = [] + + # Sara TODO: should these all be sharing the same pipeline? + for thread in range(num_streams): + threads.append(PipelineExecutorThread(pipeline, inputs, time_queue, max_time)) + for thread in threads: + thread.start() -def parse_input_config(input_config_file: str) -> Dict[str, object]: + for thread in threads: + thread.join() + + return list(time_queue.queue) + + +def parse_input_config(input_config_file: str) -> Dict[str, any]: config_file = open(input_config_file) config = json.load(config_file) config_file.close() @@ -190,10 +223,9 @@ def benchmark_pipeline( seconds_to_run: int = 10, num_streams: int = None, thread_pinning: str = "core", - engine: str = DEEPSPARSE_ENGINE, quiet: bool = False, export_path: str = None, -) -> List[StagedTimer]: +) -> Tuple[List[StagedTimer],float] : if quiet: set_logging_level(logging.WARN) @@ -205,25 +237,6 @@ def benchmark_pipeline( scenario = parse_scenario(scenario.lower(), _LOGGER) scheduler = parse_scheduler(scenario) num_streams = parse_num_streams(num_streams, num_cores, scenario, _LOGGER) - - # Compile the ONNX into a runnable model - if engine == DEEPSPARSE_ENGINE: - model = compile_model( - model=model_path, - batch_size=batch_size, - num_cores=num_cores, - num_streams=num_streams, - scheduler=scheduler, - ) - elif engine == ORT_ENGINE: - model = ORTEngine( - model=model_path, - batch_size=batch_size, - num_cores=num_cores, - ) - else: - raise ValueError(f"Invalid engine choice '{engine}'") - _LOGGER.info(model) config = parse_input_config(input_config) pipeline = Pipeline.create(task=task, model_path=model_path) @@ -245,14 +258,39 @@ def benchmark_pipeline( input_data.append(rand_array) inputs = pipeline.input_schema(images=input_data) - benchmark_end_time = time.perf_counter() + seconds_to_run - batch_timings = [] - while time.perf_counter() < benchmark_end_time: - output = pipeline(inputs) - batch_timings.append(pipeline.timer_manager.latest) + start_time = time.perf_counter() + if scenario == "singlestream": + batch_times = singlestream_benchmark(pipeline, inputs, seconds_to_run) + elif scenario == "multistream": + batch_times = multistream_benchmark(pipeline, inputs, seconds_to_run, num_streams) + elif scenario == "elastic": + batch_times = multistream_benchmark(pipeline, inputs, seconds_to_run, num_streams) + else: + raise Exception(f"Unknown scenario '{scenario}'") - return batch_timings + if len(batch_times) == 0: + raise Exception( + "Generated no batch timings, try extending benchmark time with '--time'" + ) + end_time = time.perf_counter() + total_run_time = end_time - start_time + + return batch_times, total_run_time + +def calculate_statistics(batch_times_ms: List[float]) -> Dict: + percentiles = [25.0, 50.0, 75.0, 90.0, 95.0, 99.0, 99.9] + buckets = numpy.percentile(batch_times_ms, percentiles).tolist() + percentiles_dict = { + "{:2.1f}%".format(key): value for key, value in zip(percentiles, buckets) + } + benchmark_dict = { + "median": numpy.median(batch_times_ms), + "mean": numpy.mean(batch_times_ms), + "std": numpy.std(batch_times_ms), + **percentiles_dict, + } + return benchmark_dict def main(): args = parse_args() @@ -263,7 +301,7 @@ def main(): print("Batch Size: {}".format(args.batch_size)) print("Scenario: {}".format(args.scenario)) - result = benchmark_pipeline( + batch_times, total_run_time = benchmark_pipeline( model_path=args.model_path, task=args.task_name, input_config = args.input_config, @@ -274,35 +312,46 @@ def main(): seconds_to_run=args.time, num_streams=args.num_streams, thread_pinning=args.thread_pinning, - engine=args.engine, quiet=args.quiet, export_path=args.export_path, ) - # Results summary - batches_processed = len(result) - total_time = sum(st.times['total_inference'] for st in result) - print("Processed {} batches in {} seconds".format(batches_processed, total_time)) - throughput = round(batches_processed / total_time, 4) - print("Throughput: {} batches/sec".format(throughput)) - total_pre_process = sum(st.times['pre_process'] for st in result) - total_post_process = sum(st.times['post_process'] for st in result) - total_engine_forward = sum(st.times['engine_forward'] for st in result) + pre_process_times = [st.times['pre_process'] * 1000 for st in batch_times] + pre_stats = calculate_statistics(pre_process_times) + post_process_times = [st.times['post_process'] * 1000 for st in batch_times] + post_stats = calculate_statistics(post_process_times) + engine_forward_times = [st.times['engine_forward'] * 1000 for st in batch_times] + forward_stats = calculate_statistics(engine_forward_times) - avg_pre_process = round(total_pre_process / batches_processed * 1000, 4) - avg_post_process = round(total_post_process / batches_processed * 1000, 4) - avg_engine_forward = round(total_engine_forward / batches_processed * 1000, 4) - - print("Average Pre-Process: {} ms".format(avg_pre_process)) - print("Average Post-Process: {} ms".format(avg_post_process)) - print("Average Engine Forward: {} ms".format(avg_engine_forward)) + items_per_sec = (len(batch_times) * args.batch_size) / total_run_time + total_pre_process = sum(pre_process_times) + total_post_process = sum(post_process_times) + total_engine_forward = sum(engine_forward_times) total_time = total_pre_process + total_post_process + total_engine_forward - percent_pre = round(total_pre_process / total_time * 100, 2) - percent_post = round(total_post_process / total_time * 100, 2) - percent_forward = round(total_engine_forward / total_time * 100, 2) - print("{}% Pre-processing, {}% Post-processing, {}% Inference".format(percent_pre, percent_post, percent_forward)) + percent_pre = total_pre_process / total_time * 100 + percent_post = total_post_process / total_time * 100 + percent_forward = total_engine_forward / total_time * 100 + + export_dict = { + "scenario": args.scenario, + "items_per_sec": items_per_sec, + "seconds_ran": total_run_time, + "iterations": len(batch_times), + "percent_pre": percent_pre, + "percent_post": percent_post, + "percent_forward": percent_forward, + "pre_stats": pre_stats, + "post_stats": post_stats, + "forward_stats": forward_stats + } + # Export results + export_path = args.export_path + if export_path: + _LOGGER.info("Saving benchmark results to JSON file at {}".format(export_path)) + with open(export_path, "w") as out: + json.dump(export_dict, out, indent=2) if __name__ == "__main__": main() From 58edf0580d6191ef28fecebb7ae5140bc73c3f32 Mon Sep 17 00:00:00 2001 From: Sara Adkins Date: Tue, 25 Jul 2023 14:56:42 -0400 Subject: [PATCH 07/37] clean up printed output --- .../benchmark/benchmark_pipeline.py | 60 ++++++++++++------- 1 file changed, 40 insertions(+), 20 deletions(-) diff --git a/src/deepsparse/benchmark/benchmark_pipeline.py b/src/deepsparse/benchmark/benchmark_pipeline.py index 5440724066..2a630d914f 100644 --- a/src/deepsparse/benchmark/benchmark_pipeline.py +++ b/src/deepsparse/benchmark/benchmark_pipeline.py @@ -216,7 +216,6 @@ def benchmark_pipeline( model_path: str, task: str, input_config: str, - input_type: str = "dummy", batch_size: int = 1, num_cores: int = None, scenario: str = "sync", @@ -224,7 +223,6 @@ def benchmark_pipeline( num_streams: int = None, thread_pinning: str = "core", quiet: bool = False, - export_path: str = None, ) -> Tuple[List[StagedTimer],float] : if quiet: @@ -235,28 +233,34 @@ def benchmark_pipeline( decide_thread_pinning(thread_pinning, _LOGGER) scenario = parse_scenario(scenario.lower(), _LOGGER) - scheduler = parse_scheduler(scenario) num_streams = parse_num_streams(num_streams, num_cores, scenario, _LOGGER) config = parse_input_config(input_config) + input_type = config["data_type"] pipeline = Pipeline.create(task=task, model_path=model_path) input_data = [] - if config['input_data_type'] == "string": - data_length = config['sequence_length'] - for _ in range(batch_size): - rand_string = ''.join(random.choices(string.printable, k=data_length)) - input_data.append(rand_string) - inputs = pipeline.input_schema(sequences=input_data) - elif config['input_data_type'] == "array": - image_shape = config["input_array_shape"] - dtype = config["input_array_dtype"] - for _ in range(batch_size): - if dtype == "uint8": - rand_array = numpy.random.randint(0,high=255, size=image_shape).astype(dtype) - rand_array = numpy.random.rand(*image_shape).astype(dtype) - input_data.append(rand_array) - inputs = pipeline.input_schema(images=input_data) + if input_type == "dummy": + if config['input_data_type'] == "string": + data_length = config['sequence_length'] + for _ in range(batch_size): + rand_string = ''.join(random.choices(string.printable, k=data_length)) + input_data.append(rand_string) + inputs = pipeline.input_schema(sequences=input_data) + elif config['input_data_type'] == "array": + image_shape = config["input_array_shape"] + dtype = config["input_array_dtype"] + for _ in range(batch_size): + if dtype == "uint8": + rand_array = numpy.random.randint(0,high=255, size=image_shape).astype(dtype) + rand_array = numpy.random.rand(*image_shape).astype(dtype) + input_data.append(rand_array) + inputs = pipeline.input_schema(images=input_data) + elif input_type == "real": + raise Exception("Real input type not yet implemented") + else: + raise Exception(f"Unknown input type '{input_type}'") + start_time = time.perf_counter() if scenario == "singlestream": @@ -305,7 +309,6 @@ def main(): model_path=args.model_path, task=args.task_name, input_config = args.input_config, - input_type = args.input_type, batch_size=args.batch_size, num_cores=args.num_cores, scenario=args.scenario, @@ -313,7 +316,6 @@ def main(): num_streams=args.num_streams, thread_pinning=args.thread_pinning, quiet=args.quiet, - export_path=args.export_path, ) pre_process_times = [st.times['pre_process'] * 1000 for st in batch_times] @@ -353,5 +355,23 @@ def main(): with open(export_path, "w") as out: json.dump(export_dict, out, indent=2) + # Results summary + print("Original Model Path: {}".format(args.model_path)) + print("Batch Size: {}".format(args.batch_size)) + print("Scenario: {}".format(args.scenario)) + print("Iterations: {}".format(int(export_dict["iterations"]))) + print( + "Throughput (items/sec): {:.4f}".format( + export_dict["items_per_sec"] + ) + ) + print("Processing Time Breakdown: ") + print(" Pre-Processing: {:.2f}%".format(export_dict["percent_pre"])) + print(" Post-Processing: {:.2f}%".format(export_dict["percent_post"])) + print(" Forward Pass: {:.2f}%".format(export_dict["percent_forward"])) + print("Pre-Processing Latency Mean (ms/batch): {:.4f}".format(export_dict["pre_stats"]["mean"])) + print("Post-Processing Latency Mean (ms/batch): {:.4f}".format(export_dict["post_stats"]["mean"])) + print("Forward Pass Latency Mean (ms/batch): {:.4f}".format(export_dict["forward_stats"]["mean"])) + if __name__ == "__main__": main() From 75bda3ac6961aa9d2da2a97f337d8bd8d67d3f2d Mon Sep 17 00:00:00 2001 From: Sara Adkins Date: Tue, 25 Jul 2023 17:00:59 -0400 Subject: [PATCH 08/37] adding support for real data --- .../benchmark/benchmark_pipeline.py | 112 ++++++++++++++---- 1 file changed, 92 insertions(+), 20 deletions(-) diff --git a/src/deepsparse/benchmark/benchmark_pipeline.py b/src/deepsparse/benchmark/benchmark_pipeline.py index 2a630d914f..16904f2d56 100644 --- a/src/deepsparse/benchmark/benchmark_pipeline.py +++ b/src/deepsparse/benchmark/benchmark_pipeline.py @@ -22,6 +22,7 @@ import numpy import threading import queue +import glob from deepsparse import __version__ from deepsparse import Pipeline @@ -193,7 +194,6 @@ def multistream_benchmark( max_time = time.perf_counter() + seconds_to_run threads = [] - # Sara TODO: should these all be sharing the same pipeline? for thread in range(num_streams): threads.append(PipelineExecutorThread(pipeline, inputs, time_queue, max_time)) @@ -212,6 +212,84 @@ def parse_input_config(input_config_file: str) -> Dict[str, any]: config_file.close() return config +def get_input_schema_type(pipeline: Pipeline) -> str: + input_schema_requirements = list(pipeline.input_schema.__annotations__.keys()) + image_requirements = ["images"] + text_requirements = ["sequences", "text"] + + if len(input_schema_requirements) == 1: + requirement = input_schema_requirements[0] + if requirement in image_requirements: + return "image" + elif requirement in text_requirements: + return "text" + + raise Exception("Unknown schema requirement {}".format(input_schema_requirements)) + +def generate_image_data(config: Dict, batch_size: int) -> List[numpy.ndarray]: + input_data = [] + if "input_image_shape" in config and len(config["input_image_shape"]) == 3: + image_shape = config["input_image_shape"] + else: + image_shape = (240, 240, 3) + _LOGGER.warning("Using default image shape {}".format(image_shape)) + + for _ in range(batch_size): + rand_array = numpy.random.randint(0,high=255, size=image_shape).astype(numpy.uint8) + input_data.append(rand_array) + + return input_data + +def load_image_data(config: Dict, batch_size: int) -> List[str]: + path_to_data = config["data_folder"] + recursive_search = config["recursive_search"] + files = [] + for f in glob.glob(path_to_data + "/**", recursive=recursive_search): + if f.lower().endswith(".jpeg"): + files.append(f) + if len(files) < batch_size: + raise Exception("Not enough images found in {}".format(path_to_data)) + input_data = random.sample(files, batch_size) + + return input_data + +def generate_text_data(config: Dict, batch_size: int) -> List[str]: + input_data = [] + if 'sequence_length' in config: + string_length = config['sequence_length'] + else: + string_length = 100 + _LOGGER.warning("Using default string length {}".format(string_length)) + for _ in range(batch_size): + rand_string = ''.join(random.choices(string.printable, k=string_length)) + input_data.append(rand_string) + + return input_data + +def load_text_data(config: Dict, batch_size: int) -> List[str]: + path_to_data = config["data_folder"] + recursive_search = config["recursive_search"] + files = [] + for f in glob.glob(path_to_data + "/**", recursive=recursive_search): + if f.lower().endswith(".txt"): + files.append(f) + if len(files) < batch_size: + raise Exception("Not enough images found in {}".format(path_to_data)) + input_files = random.sample(files, batch_size) + if "max_string_length" in config: + max_string_length = config["max_string_length"] + else: + max_string_length = -1 + _LOGGER.warning("Using default max string length {}".format(max_string_length)) + input_data = [] + for f_path in input_files: + f = open(f_path) + text_data = f.read() + f.close() + input_data.append(text_data[:max_string_length]) + print(input_data) + return input_data + def benchmark_pipeline( model_path: str, task: str, @@ -238,26 +316,22 @@ def benchmark_pipeline( config = parse_input_config(input_config) input_type = config["data_type"] pipeline = Pipeline.create(task=task, model_path=model_path) + input_schema_requirement = get_input_schema_type(pipeline) - input_data = [] if input_type == "dummy": - if config['input_data_type'] == "string": - data_length = config['sequence_length'] - for _ in range(batch_size): - rand_string = ''.join(random.choices(string.printable, k=data_length)) - input_data.append(rand_string) - inputs = pipeline.input_schema(sequences=input_data) - elif config['input_data_type'] == "array": - image_shape = config["input_array_shape"] - dtype = config["input_array_dtype"] - for _ in range(batch_size): - if dtype == "uint8": - rand_array = numpy.random.randint(0,high=255, size=image_shape).astype(dtype) - rand_array = numpy.random.rand(*image_shape).astype(dtype) - input_data.append(rand_array) + if input_schema_requirement == "image": + input_data = generate_image_data(config, batch_size) inputs = pipeline.input_schema(images=input_data) + elif input_schema_requirement == "text": + input_data = generate_text_data(config, batch_size) + inputs = pipeline.input_schema(sequences=input_data) elif input_type == "real": - raise Exception("Real input type not yet implemented") + if input_schema_requirement == "image": + input_data = load_image_data(config, batch_size) + inputs = pipeline.input_schema(images=input_data) + elif input_schema_requirement == "text": + input_data = load_text_data(config, batch_size) + inputs = pipeline.input_schema(sequences=input_data) else: raise Exception(f"Unknown input type '{input_type}'") @@ -273,9 +347,7 @@ def benchmark_pipeline( raise Exception(f"Unknown scenario '{scenario}'") if len(batch_times) == 0: - raise Exception( - "Generated no batch timings, try extending benchmark time with '--time'" - ) + raise Exception("Generated no batch timings, try extending benchmark time with '--time'") end_time = time.perf_counter() total_run_time = end_time - start_time From b751e750189cde1ea365065d9ad5c1a16a64489a Mon Sep 17 00:00:00 2001 From: Sara Adkins Date: Thu, 27 Jul 2023 12:33:05 -0400 Subject: [PATCH 09/37] support for additional pipelines --- .../benchmark/benchmark_pipeline.py | 134 ++++++++++++------ 1 file changed, 90 insertions(+), 44 deletions(-) diff --git a/src/deepsparse/benchmark/benchmark_pipeline.py b/src/deepsparse/benchmark/benchmark_pipeline.py index 16904f2d56..ac2ec44607 100644 --- a/src/deepsparse/benchmark/benchmark_pipeline.py +++ b/src/deepsparse/benchmark/benchmark_pipeline.py @@ -215,15 +215,19 @@ def parse_input_config(input_config_file: str) -> Dict[str, any]: def get_input_schema_type(pipeline: Pipeline) -> str: input_schema_requirements = list(pipeline.input_schema.__annotations__.keys()) image_requirements = ["images"] - text_requirements = ["sequences", "text"] - - if len(input_schema_requirements) == 1: - requirement = input_schema_requirements[0] - if requirement in image_requirements: - return "image" - elif requirement in text_requirements: - return "text" - + basic_text_requirements = ["sequences"] + question_requirements = ["question", "context", "id"] + text_generation_requirements = ["sequences", "return_logits", "session_id", "fixed_sequences_length"] + + if input_schema_requirements == image_requirements or "YOLO" in pipeline.input_schema.__name__: + return "image" + elif input_schema_requirements == basic_text_requirements: + return "text" + elif input_schema_requirements == question_requirements: + return "question" + elif input_schema_requirements == text_generation_requirements: + return "text_generation" + raise Exception("Unknown schema requirement {}".format(input_schema_requirements)) def generate_image_data(config: Dict, batch_size: int) -> List[numpy.ndarray]: @@ -255,14 +259,14 @@ def load_image_data(config: Dict, batch_size: int) -> List[str]: def generate_text_data(config: Dict, batch_size: int) -> List[str]: input_data = [] - if 'sequence_length' in config: - string_length = config['sequence_length'] + if 'gen_sequence_length' in config: + string_length = config['gen_sequence_length'] else: string_length = 100 _LOGGER.warning("Using default string length {}".format(string_length)) for _ in range(batch_size): - rand_string = ''.join(random.choices(string.printable, k=string_length)) - input_data.append(rand_string) + rand_sentence = generate_sentence(string_length) + input_data.append(rand_sentence) return input_data @@ -287,9 +291,38 @@ def load_text_data(config: Dict, batch_size: int) -> List[str]: text_data = f.read() f.close() input_data.append(text_data[:max_string_length]) - print(input_data) return input_data +def generate_sentence(string_length: int, avg_word_length: int = 5): + random_chars = ''.join(random.choices(string.ascii_letters, k=string_length)) + space_locations = random.sample(range(string_length), int(string_length / avg_word_length)) + random_chars = list(random_chars) + for loc in space_locations: + random_chars[loc] = ' ' + return ''.join(random_chars) + +def generate_question_data(config: Dict) -> Tuple[str, str]: + if 'gen_sequence_length' in config: + string_length = config['gen_sequence_length'] + else: + string_length = 100 + _LOGGER.warning("Using default string length {}".format(string_length)) + question = generate_sentence(string_length) + context = generate_sentence(string_length) + return (question, context) + +def load_question_data(config: Dict) -> Tuple[str, str]: + path_to_questions = config["question_file"] + path_to_context = config["context_file"] + + f_question = open(path_to_questions) + f_context = open(path_to_context) + question = f_question.read() + context = f_context.read() + f_question.close() + f_context.close() + return question, context + def benchmark_pipeline( model_path: str, task: str, @@ -325,6 +358,14 @@ def benchmark_pipeline( elif input_schema_requirement == "text": input_data = generate_text_data(config, batch_size) inputs = pipeline.input_schema(sequences=input_data) + elif input_schema_requirement == "question": + _LOGGER.warn("Only batch size of 1 supported for Question Answering Pipeline") + question, context = generate_question_data(config) + inputs = pipeline.input_schema(question=question, context=context) + elif input_schema_requirement == "text_generation": + seqs = generate_text_data(config, batch_size) + fix_len = config["fix_sequence_length"] + inputs = pipeline.input_schema(sequences=seqs, return_logits=False, session_id=None, fixed_sequences_length=fix_len) elif input_type == "real": if input_schema_requirement == "image": input_data = load_image_data(config, batch_size) @@ -332,6 +373,14 @@ def benchmark_pipeline( elif input_schema_requirement == "text": input_data = load_text_data(config, batch_size) inputs = pipeline.input_schema(sequences=input_data) + elif input_schema_requirement == "question": + _LOGGER.warn("Only batch size of 1 supported for Question Answering Pipeline") + question, context = load_question_data(config) + inputs = pipeline.input_schema(question=question, context=context) + elif input_schema_requirement == "text_generation": + seqs = load_text_data(config, batch_size) + fix_len = config["fix_sequence_length"] + inputs = pipeline.input_schema(sequences=seqs, return_logits=False, session_id=None, fixed_sequences_length=fix_len) else: raise Exception(f"Unknown input type '{input_type}'") @@ -346,21 +395,22 @@ def benchmark_pipeline( else: raise Exception(f"Unknown scenario '{scenario}'") - if len(batch_times) == 0: - raise Exception("Generated no batch timings, try extending benchmark time with '--time'") end_time = time.perf_counter() total_run_time = end_time - start_time + if len(batch_times) == 0: + raise Exception("Generated no batch timings, try extending benchmark time with '--time'") return batch_times, total_run_time -def calculate_statistics(batch_times_ms: List[float]) -> Dict: +def calculate_statistics(batch_times_ms: List[float], total_run_time_ms: float) -> Dict: percentiles = [25.0, 50.0, 75.0, 90.0, 95.0, 99.0, 99.9] buckets = numpy.percentile(batch_times_ms, percentiles).tolist() percentiles_dict = { "{:2.1f}%".format(key): value for key, value in zip(percentiles, buckets) } - + benchmark_dict = { + "total_percentage": sum(batch_times_ms) / total_run_time_ms * 100, "median": numpy.median(batch_times_ms), "mean": numpy.mean(batch_times_ms), "std": numpy.std(batch_times_ms), @@ -368,6 +418,18 @@ def calculate_statistics(batch_times_ms: List[float]) -> Dict: } return benchmark_dict +def calculate_section_stats(batch_times: List[StagedTimer], total_run_time: float) -> Dict[str, Dict]: + compute_sections = batch_times[0].stages + total_run_time_ms = total_run_time * 1000 + + sections = {} + for section in compute_sections: + section_times = [st.times[section] * 1000 for st in batch_times] + sections[section] = calculate_statistics(section_times, total_run_time_ms) + + return sections + + def main(): args = parse_args() @@ -390,34 +452,16 @@ def main(): quiet=args.quiet, ) - pre_process_times = [st.times['pre_process'] * 1000 for st in batch_times] - pre_stats = calculate_statistics(pre_process_times) - post_process_times = [st.times['post_process'] * 1000 for st in batch_times] - post_stats = calculate_statistics(post_process_times) - engine_forward_times = [st.times['engine_forward'] * 1000 for st in batch_times] - forward_stats = calculate_statistics(engine_forward_times) - + section_stats = calculate_section_stats(batch_times, total_run_time) items_per_sec = (len(batch_times) * args.batch_size) / total_run_time - total_pre_process = sum(pre_process_times) - total_post_process = sum(post_process_times) - total_engine_forward = sum(engine_forward_times) - total_time = total_pre_process + total_post_process + total_engine_forward - percent_pre = total_pre_process / total_time * 100 - percent_post = total_post_process / total_time * 100 - percent_forward = total_engine_forward / total_time * 100 export_dict = { "scenario": args.scenario, "items_per_sec": items_per_sec, "seconds_ran": total_run_time, "iterations": len(batch_times), - "percent_pre": percent_pre, - "percent_post": percent_post, - "percent_forward": percent_forward, - "pre_stats": pre_stats, - "post_stats": post_stats, - "forward_stats": forward_stats + "compute_sections": section_stats } # Export results @@ -437,13 +481,15 @@ def main(): export_dict["items_per_sec"] ) ) + print("Processing Time Breakdown: ") - print(" Pre-Processing: {:.2f}%".format(export_dict["percent_pre"])) - print(" Post-Processing: {:.2f}%".format(export_dict["percent_post"])) - print(" Forward Pass: {:.2f}%".format(export_dict["percent_forward"])) - print("Pre-Processing Latency Mean (ms/batch): {:.4f}".format(export_dict["pre_stats"]["mean"])) - print("Post-Processing Latency Mean (ms/batch): {:.4f}".format(export_dict["post_stats"]["mean"])) - print("Forward Pass Latency Mean (ms/batch): {:.4f}".format(export_dict["forward_stats"]["mean"])) + compute_sections = batch_times[0].stages + for section in compute_sections: + print(" {}: {:.2f}%".format(section, section_stats[section]["total_percentage"])) + + print("Mean Latency Breakdown (ms/batch): ") + for section in compute_sections: + print(" {}: {:.4f}".format(section, section_stats[section]["mean"])) if __name__ == "__main__": main() From 76a5af942f1233a8c7c307725c06f50a1ef24e31 Mon Sep 17 00:00:00 2001 From: Sara Adkins Date: Thu, 27 Jul 2023 15:50:54 -0400 Subject: [PATCH 10/37] expanding input schemas, allowing for kwargs --- .../benchmark/benchmark_pipeline.py | 115 +++++++++--------- 1 file changed, 59 insertions(+), 56 deletions(-) diff --git a/src/deepsparse/benchmark/benchmark_pipeline.py b/src/deepsparse/benchmark/benchmark_pipeline.py index ac2ec44607..a62fa09688 100644 --- a/src/deepsparse/benchmark/benchmark_pipeline.py +++ b/src/deepsparse/benchmark/benchmark_pipeline.py @@ -45,6 +45,9 @@ DEEPSPARSE_ENGINE = "deepsparse" ORT_ENGINE = "onnxruntime" +DEFAULT_STRING_LENGTH = 50 +DEFAULT_IMAGE_SHAPE = (240, 240, 3) + def parse_args(): parser = argparse.ArgumentParser( @@ -212,22 +215,40 @@ def parse_input_config(input_config_file: str) -> Dict[str, any]: config_file.close() return config +def get_files_with_endings(folder:str, num_files: int, recursive: bool, file_endings: List[str]) -> List[str]: + files = [] + for f in glob.glob(folder + "/**", recursivere=recursive): + if f.lower().endswith(file_endings): + files.append(f) + if len(files) < num_files: + raise Exception("Not enough images found in {}".format(folder)) + return random.sample(files, num_files) + +def generate_sentence(string_length: int, avg_word_length: int = 5): + random_chars = ''.join(random.choices(string.ascii_letters, k=string_length)) + space_locations = random.sample(range(string_length), int(string_length / avg_word_length)) + random_chars = list(random_chars) + for loc in space_locations: + random_chars[loc] = ' ' + return ''.join(random_chars) + def get_input_schema_type(pipeline: Pipeline) -> str: - input_schema_requirements = list(pipeline.input_schema.__annotations__.keys()) - image_requirements = ["images"] - basic_text_requirements = ["sequences"] - question_requirements = ["question", "context", "id"] - text_generation_requirements = ["sequences", "return_logits", "session_id", "fixed_sequences_length"] + input_schema_requirements = list(pipeline.input_schema.__fields__.keys()) + input_schema_fields = pipeline.input_schema.__fields__ - if input_schema_requirements == image_requirements or "YOLO" in pipeline.input_schema.__name__: + if "images" in input_schema_requirements: return "image" - elif input_schema_requirements == basic_text_requirements: - return "text" - elif input_schema_requirements == question_requirements: + if "sequences" in input_schema_requirements: + sequence_types = [f.outer_type_ for f in input_schema_fields['sequences'].sub_fields] + if List[str] in sequence_types: + return "text_sequence" + elif "inputs" in input_schema_requirements: + sequence_types = [f.outer_type_ for f in input_schema_fields['inputs'].sub_fields] + if List[str] in sequence_types: + return "text_inputs" + elif "question" in input_schema_requirements: return "question" - elif input_schema_requirements == text_generation_requirements: - return "text_generation" - + raise Exception("Unknown schema requirement {}".format(input_schema_requirements)) def generate_image_data(config: Dict, batch_size: int) -> List[numpy.ndarray]: @@ -235,7 +256,7 @@ def generate_image_data(config: Dict, batch_size: int) -> List[numpy.ndarray]: if "input_image_shape" in config and len(config["input_image_shape"]) == 3: image_shape = config["input_image_shape"] else: - image_shape = (240, 240, 3) + image_shape = DEFAULT_IMAGE_SHAPE _LOGGER.warning("Using default image shape {}".format(image_shape)) for _ in range(batch_size): @@ -247,22 +268,14 @@ def generate_image_data(config: Dict, batch_size: int) -> List[numpy.ndarray]: def load_image_data(config: Dict, batch_size: int) -> List[str]: path_to_data = config["data_folder"] recursive_search = config["recursive_search"] - files = [] - for f in glob.glob(path_to_data + "/**", recursive=recursive_search): - if f.lower().endswith(".jpeg"): - files.append(f) - if len(files) < batch_size: - raise Exception("Not enough images found in {}".format(path_to_data)) - input_data = random.sample(files, batch_size) - - return input_data + return get_files_with_endings(path_to_data, batch_size, recursive_search, [".jpg", ".jpeg", ".gif"]) def generate_text_data(config: Dict, batch_size: int) -> List[str]: input_data = [] if 'gen_sequence_length' in config: string_length = config['gen_sequence_length'] else: - string_length = 100 + string_length = DEFAULT_STRING_LENGTH _LOGGER.warning("Using default string length {}".format(string_length)) for _ in range(batch_size): rand_sentence = generate_sentence(string_length) @@ -273,13 +286,7 @@ def generate_text_data(config: Dict, batch_size: int) -> List[str]: def load_text_data(config: Dict, batch_size: int) -> List[str]: path_to_data = config["data_folder"] recursive_search = config["recursive_search"] - files = [] - for f in glob.glob(path_to_data + "/**", recursive=recursive_search): - if f.lower().endswith(".txt"): - files.append(f) - if len(files) < batch_size: - raise Exception("Not enough images found in {}".format(path_to_data)) - input_files = random.sample(files, batch_size) + input_files = get_files_with_endings(path_to_data, batch_size, recursive_search, [".txt"]) if "max_string_length" in config: max_string_length = config["max_string_length"] else: @@ -293,19 +300,11 @@ def load_text_data(config: Dict, batch_size: int) -> List[str]: input_data.append(text_data[:max_string_length]) return input_data -def generate_sentence(string_length: int, avg_word_length: int = 5): - random_chars = ''.join(random.choices(string.ascii_letters, k=string_length)) - space_locations = random.sample(range(string_length), int(string_length / avg_word_length)) - random_chars = list(random_chars) - for loc in space_locations: - random_chars[loc] = ' ' - return ''.join(random_chars) - def generate_question_data(config: Dict) -> Tuple[str, str]: if 'gen_sequence_length' in config: string_length = config['gen_sequence_length'] else: - string_length = 100 + string_length = DEFAULT_STRING_LENGTH _LOGGER.warning("Using default string length {}".format(string_length)) question = generate_sentence(string_length) context = generate_sentence(string_length) @@ -348,39 +347,43 @@ def benchmark_pipeline( config = parse_input_config(input_config) input_type = config["data_type"] - pipeline = Pipeline.create(task=task, model_path=model_path) + kwargs = {} + if "pipeline_kwargs" in config: + kwargs = config["pipeline_kwargs"] + pipeline = Pipeline.create(task=task, model_path=model_path, **kwargs) input_schema_requirement = get_input_schema_type(pipeline) + kwargs = {} + if "input_schema_kwargs" in config: + kwargs = config["input_schema_kwargs"] if input_type == "dummy": if input_schema_requirement == "image": input_data = generate_image_data(config, batch_size) - inputs = pipeline.input_schema(images=input_data) - elif input_schema_requirement == "text": + inputs = pipeline.input_schema(images=input_data, **kwargs) + elif input_schema_requirement == "text_sequence": + input_data = generate_text_data(config, batch_size) + inputs = pipeline.input_schema(sequences=input_data, **kwargs) + elif input_schema_requirement == "text_inputs": input_data = generate_text_data(config, batch_size) - inputs = pipeline.input_schema(sequences=input_data) + inputs = pipeline.input_schema(inputs=input_data, **kwargs) elif input_schema_requirement == "question": _LOGGER.warn("Only batch size of 1 supported for Question Answering Pipeline") question, context = generate_question_data(config) - inputs = pipeline.input_schema(question=question, context=context) - elif input_schema_requirement == "text_generation": - seqs = generate_text_data(config, batch_size) - fix_len = config["fix_sequence_length"] - inputs = pipeline.input_schema(sequences=seqs, return_logits=False, session_id=None, fixed_sequences_length=fix_len) + inputs = pipeline.input_schema(question=question, context=context, **kwargs) elif input_type == "real": if input_schema_requirement == "image": input_data = load_image_data(config, batch_size) - inputs = pipeline.input_schema(images=input_data) - elif input_schema_requirement == "text": + inputs = pipeline.input_schema(images=input_data, **kwargs) + elif input_schema_requirement == "text_sequence": + input_data = load_text_data(config, batch_size) + inputs = pipeline.input_schema(sequences=input_data, **kwargs) + elif input_schema_requirement == "text_inputs": input_data = load_text_data(config, batch_size) - inputs = pipeline.input_schema(sequences=input_data) + inputs = pipeline.input_schema(inputs=input_data, **kwargs) elif input_schema_requirement == "question": _LOGGER.warn("Only batch size of 1 supported for Question Answering Pipeline") question, context = load_question_data(config) - inputs = pipeline.input_schema(question=question, context=context) - elif input_schema_requirement == "text_generation": - seqs = load_text_data(config, batch_size) - fix_len = config["fix_sequence_length"] - inputs = pipeline.input_schema(sequences=seqs, return_logits=False, session_id=None, fixed_sequences_length=fix_len) + inputs = pipeline.input_schema(question=question, context=context, **kwargs) else: raise Exception(f"Unknown input type '{input_type}'") From 6cb6bef4a4e31c60e4258fb5e4725d155f3b16ca Mon Sep 17 00:00:00 2001 From: Sara Adkins Date: Fri, 28 Jul 2023 11:00:30 -0400 Subject: [PATCH 11/37] README, quality, additional args --- src/deepsparse/benchmark/README.md | 68 ++++++ src/deepsparse/benchmark/benchmark_model.py | 13 +- .../benchmark/benchmark_pipeline.py | 206 +++++++++++------- src/deepsparse/benchmark/helpers.py | 25 ++- 4 files changed, 229 insertions(+), 83 deletions(-) diff --git a/src/deepsparse/benchmark/README.md b/src/deepsparse/benchmark/README.md index c67133744e..6bcb3477ec 100644 --- a/src/deepsparse/benchmark/README.md +++ b/src/deepsparse/benchmark/README.md @@ -186,4 +186,72 @@ Latency Mean (ms/batch): 16.0732 Latency Median (ms/batch): 15.7850 Latency Std (ms/batch): 1.0427 Iterations: 622 +``` + +## 📜 Benchmarking Pipelines +Expanding on the model benchmarking script, the pipeline benchmarker is a tool for benchmarking end-to-end inference, including pre and post processing. The script can generate fake input data based on the pipeline's input schema, or load it from a local folder. The pipeline then runs pre-processing, engine inference and post-processing. Benchmarking results are reported by section, useful for identifying bottlenecks. + +### Usage +Input arguments are the same as the Engine benchmarker, but with two addtions: + +``` +positional arguments: + task_name Type of pipeline to run(i.e "text_generation") + +optional arguments: + -c INPUT_CONFIG, --input_config INPUT_CONFIG + JSON file containing schema for input data +``` + +The `input_config` argument is a path to a json file specifying details on the input schema to the pipeline, detailed below. + +### Configuring Pipeline Inputs + +Inputs to the pipeline are configured through a json config file. The `data_type` field should be set to `"dummy"` if passing randomly generated data through the pipeline, and `"real"` if passing in data from files. + +#### Dummy Input Configuration +An example dummy input configuration is shown below. +* `gen_sequence_length`: number of characters to generate for pipelines that take text input +* `input_image_shape`: configures image size for pipelines that take image input, must be 3 dimmensional with channel as the last dimmension + +```json +{ + "data_type": "dummy", + "gen_sequence_length": 100, + "input_image_shape": [500,500,3], + "pipeline_kwargs": {}, + "input_schema_kwargs": {} +} +``` + +#### Real Input Configuration +An example real input configuration is shown below. +* `data_folder`: path to local folder of input data, should contain text or image files +* `recursive_search`: whether to recursively search through `data_folder` for files +* `max_string_length`: maximum characters to read from each file containing text data, -1 for no max length + +```json +{ + "data_type": "real", + "data_folder": "/home/sadkins/imagenette2-320/", + "recursive_search": true, + "max_string_length": -1, + "pipeline_kwargs": {}, + "input_schema_kwargs": {} +} +``` + +#### Keyword Arguments +Additional arguments to the pipeline or input_schema can be added to the `pipeline_kwargs` and `input_schema_kwargs` fields respectively. For instance, to pass class_names to a YOLO pipeline and conf_thres to the input schema +```json +{ + "data_type": "dummy", + "input_image_shape": [500,500,3], + "pipeline_kwargs": { + "class_names": ["classA", "classB"] + }, + "input_schema_kwargs": { + "conf_thres": 0.7 + } +} ``` \ No newline at end of file diff --git a/src/deepsparse/benchmark/benchmark_model.py b/src/deepsparse/benchmark/benchmark_model.py index ea280cc809..04fcdb8c7a 100644 --- a/src/deepsparse/benchmark/benchmark_model.py +++ b/src/deepsparse/benchmark/benchmark_model.py @@ -95,10 +95,15 @@ import importlib import json import logging -import os from typing import Dict from deepsparse import __version__, compile_model +from deepsparse.benchmark.helpers import ( + decide_thread_pinning, + parse_num_streams, + parse_scenario, + parse_scheduler, +) from deepsparse.benchmark.ort_engine import ORTEngine from deepsparse.benchmark.stream_benchmark import model_stream_benchmark from deepsparse.cpu import cpu_architecture @@ -109,12 +114,6 @@ override_onnx_input_shapes, parse_input_shapes, ) -from deepsparse.benchmark.helpers import ( - decide_thread_pinning, - parse_scheduler, - parse_scenario, - parse_num_streams -) __all__ = ["benchmark_model"] diff --git a/src/deepsparse/benchmark/benchmark_pipeline.py b/src/deepsparse/benchmark/benchmark_pipeline.py index a62fa09688..a791b11853 100644 --- a/src/deepsparse/benchmark/benchmark_pipeline.py +++ b/src/deepsparse/benchmark/benchmark_pipeline.py @@ -13,28 +13,28 @@ # limitations under the License. import argparse +import glob import json -import string import logging +import queue import random -from typing import Dict, List, Tuple +import string +import threading import time +from typing import Dict, List, Tuple + import numpy -import threading -import queue -import glob -from deepsparse import __version__ from deepsparse import Pipeline -from deepsparse.cpu import cpu_architecture -from deepsparse.log import set_logging_level -from deepsparse.utils.timer import StagedTimer from deepsparse.benchmark.helpers import ( decide_thread_pinning, - parse_scheduler, + parse_num_streams, parse_scenario, - parse_num_streams + parse_scheduler, ) +from deepsparse.cpu import cpu_architecture +from deepsparse.log import set_logging_level +from deepsparse.utils.timer import StagedTimer __all__ = ["benchmark_pipeline"] @@ -50,14 +50,8 @@ def parse_args(): - parser = argparse.ArgumentParser( - description="Benchmark DeepSparse Pipelines" - ) - parser.add_argument( - "task_name", - type=str, - help="Type of pipeline to run" - ) + parser = argparse.ArgumentParser(description="Benchmark DeepSparse Pipelines") + parser.add_argument("task_name", type=str, help="Type of pipeline to run") parser.add_argument( "model_path", type=str, @@ -68,15 +62,7 @@ def parse_args(): "--input_config", type=str, default="config.json", - help="JSON file containing schema for input data" - ) - parser.add_argument( - "-i", - "--input_type", - type=str, - default="dummy", - choices=["dummy", "real"], - help="Type of input data to use, real or randomly generated" + help="JSON file containing schema for input data", ) parser.add_argument( "-b", @@ -115,6 +101,16 @@ def parse_args(): default=10, help="The number of seconds the benchmark will run. Default is 10 seconds.", ) + parser.add_argument( + "-w", + "--warmup_time", + type=int, + default=2, + help=( + "The number of seconds the benchmark will warmup before running." + "Default is 2 seconds." + ), + ) parser.add_argument( "-nstreams", "--num_streams", @@ -137,6 +133,19 @@ def parse_args(): "threads to cores on sockets ('numa'), or disable ('none')" ), ) + parser.add_argument( + "-e", + "--engine", + type=str, + default=DEEPSPARSE_ENGINE, + help=( + "Inference engine backend to run eval on. Choices are 'deepsparse', " + "'onnxruntime'. Default is 'deepsparse'. Can also specify a user " + "defined engine class by giving the script and class name in the " + "following format :. This " + "engine class will be dynamically imported during runtime" + ), + ) parser.add_argument( "-q", "--quiet", @@ -154,13 +163,14 @@ def parse_args(): return parser.parse_args() + class PipelineExecutorThread(threading.Thread): def __init__( self, pipeline: Pipeline, inputs: List[any], time_queue: queue.Queue, - max_time: float + max_time: float, ): super(PipelineExecutorThread, self).__init__() self._pipeline = pipeline @@ -170,23 +180,22 @@ def __init__( def run(self): while time.perf_counter() < self._max_time: - output = self._pipeline(self._inputs) + _ = self._pipeline(self._inputs) self._time_queue.put(self._pipeline.timer_manager.latest) def singlestream_benchmark( - pipeline: Pipeline, - inputs: List[any], - seconds_to_run: float + pipeline: Pipeline, inputs: List[any], seconds_to_run: float ) -> List[StagedTimer]: benchmark_end_time = time.perf_counter() + seconds_to_run batch_timings = [] while time.perf_counter() < benchmark_end_time: - output = pipeline(inputs) + _ = pipeline(inputs) batch_timings.append(pipeline.timer_manager.latest) return batch_timings + def multistream_benchmark( pipeline: Pipeline, inputs: List[any], @@ -215,7 +224,10 @@ def parse_input_config(input_config_file: str) -> Dict[str, any]: config_file.close() return config -def get_files_with_endings(folder:str, num_files: int, recursive: bool, file_endings: List[str]) -> List[str]: + +def get_files_with_endings( + folder: str, num_files: int, recursive: bool, file_endings: List[str] +) -> List[str]: files = [] for f in glob.glob(folder + "/**", recursivere=recursive): if f.lower().endswith(file_endings): @@ -224,13 +236,17 @@ def get_files_with_endings(folder:str, num_files: int, recursive: bool, file_end raise Exception("Not enough images found in {}".format(folder)) return random.sample(files, num_files) + def generate_sentence(string_length: int, avg_word_length: int = 5): - random_chars = ''.join(random.choices(string.ascii_letters, k=string_length)) - space_locations = random.sample(range(string_length), int(string_length / avg_word_length)) + random_chars = "".join(random.choices(string.ascii_letters, k=string_length)) + space_locations = random.sample( + range(string_length), int(string_length / avg_word_length) + ) random_chars = list(random_chars) for loc in space_locations: - random_chars[loc] = ' ' - return ''.join(random_chars) + random_chars[loc] = " " + return "".join(random_chars) + def get_input_schema_type(pipeline: Pipeline) -> str: input_schema_requirements = list(pipeline.input_schema.__fields__.keys()) @@ -239,18 +255,23 @@ def get_input_schema_type(pipeline: Pipeline) -> str: if "images" in input_schema_requirements: return "image" if "sequences" in input_schema_requirements: - sequence_types = [f.outer_type_ for f in input_schema_fields['sequences'].sub_fields] + sequence_types = [ + f.outer_type_ for f in input_schema_fields["sequences"].sub_fields + ] if List[str] in sequence_types: return "text_sequence" elif "inputs" in input_schema_requirements: - sequence_types = [f.outer_type_ for f in input_schema_fields['inputs'].sub_fields] + sequence_types = [ + f.outer_type_ for f in input_schema_fields["inputs"].sub_fields + ] if List[str] in sequence_types: return "text_inputs" elif "question" in input_schema_requirements: return "question" - + raise Exception("Unknown schema requirement {}".format(input_schema_requirements)) + def generate_image_data(config: Dict, batch_size: int) -> List[numpy.ndarray]: input_data = [] if "input_image_shape" in config and len(config["input_image_shape"]) == 3: @@ -260,33 +281,42 @@ def generate_image_data(config: Dict, batch_size: int) -> List[numpy.ndarray]: _LOGGER.warning("Using default image shape {}".format(image_shape)) for _ in range(batch_size): - rand_array = numpy.random.randint(0,high=255, size=image_shape).astype(numpy.uint8) + rand_array = numpy.random.randint(0, high=255, size=image_shape).astype( + numpy.uint8 + ) input_data.append(rand_array) return input_data + def load_image_data(config: Dict, batch_size: int) -> List[str]: path_to_data = config["data_folder"] recursive_search = config["recursive_search"] - return get_files_with_endings(path_to_data, batch_size, recursive_search, [".jpg", ".jpeg", ".gif"]) + return get_files_with_endings( + path_to_data, batch_size, recursive_search, [".jpg", ".jpeg", ".gif"] + ) + def generate_text_data(config: Dict, batch_size: int) -> List[str]: input_data = [] - if 'gen_sequence_length' in config: - string_length = config['gen_sequence_length'] + if "gen_sequence_length" in config: + string_length = config["gen_sequence_length"] else: string_length = DEFAULT_STRING_LENGTH _LOGGER.warning("Using default string length {}".format(string_length)) for _ in range(batch_size): rand_sentence = generate_sentence(string_length) input_data.append(rand_sentence) - + return input_data + def load_text_data(config: Dict, batch_size: int) -> List[str]: path_to_data = config["data_folder"] recursive_search = config["recursive_search"] - input_files = get_files_with_endings(path_to_data, batch_size, recursive_search, [".txt"]) + input_files = get_files_with_endings( + path_to_data, batch_size, recursive_search, [".txt"] + ) if "max_string_length" in config: max_string_length = config["max_string_length"] else: @@ -300,9 +330,10 @@ def load_text_data(config: Dict, batch_size: int) -> List[str]: input_data.append(text_data[:max_string_length]) return input_data + def generate_question_data(config: Dict) -> Tuple[str, str]: - if 'gen_sequence_length' in config: - string_length = config['gen_sequence_length'] + if "gen_sequence_length" in config: + string_length = config["gen_sequence_length"] else: string_length = DEFAULT_STRING_LENGTH _LOGGER.warning("Using default string length {}".format(string_length)) @@ -310,6 +341,7 @@ def generate_question_data(config: Dict) -> Tuple[str, str]: context = generate_sentence(string_length) return (question, context) + def load_question_data(config: Dict) -> Tuple[str, str]: path_to_questions = config["question_file"] path_to_context = config["context_file"] @@ -322,6 +354,7 @@ def load_question_data(config: Dict) -> Tuple[str, str]: f_context.close() return question, context + def benchmark_pipeline( model_path: str, task: str, @@ -330,11 +363,13 @@ def benchmark_pipeline( num_cores: int = None, scenario: str = "sync", seconds_to_run: int = 10, + warmup_time: int = 2, num_streams: int = None, thread_pinning: str = "core", + engine: str = DEEPSPARSE_ENGINE, quiet: bool = False, -) -> Tuple[List[StagedTimer],float] : - +) -> Tuple[List[StagedTimer], float]: + if quiet: set_logging_level(logging.WARN) @@ -343,14 +378,22 @@ def benchmark_pipeline( decide_thread_pinning(thread_pinning, _LOGGER) scenario = parse_scenario(scenario.lower(), _LOGGER) + scheduler = parse_scheduler(scenario) num_streams = parse_num_streams(num_streams, num_cores, scenario, _LOGGER) - + config = parse_input_config(input_config) input_type = config["data_type"] kwargs = {} if "pipeline_kwargs" in config: kwargs = config["pipeline_kwargs"] - pipeline = Pipeline.create(task=task, model_path=model_path, **kwargs) + pipeline = Pipeline.create( + task=task, + model_path=model_path, + engine_type=engine, + scheduler=scheduler, + num_cores=num_cores, + **kwargs, + ) input_schema_requirement = get_input_schema_type(pipeline) kwargs = {} if "input_schema_kwargs" in config: @@ -367,7 +410,9 @@ def benchmark_pipeline( input_data = generate_text_data(config, batch_size) inputs = pipeline.input_schema(inputs=input_data, **kwargs) elif input_schema_requirement == "question": - _LOGGER.warn("Only batch size of 1 supported for Question Answering Pipeline") + _LOGGER.warn( + "Only batch size of 1 supported for Question Answering Pipeline" + ) question, context = generate_question_data(config) inputs = pipeline.input_schema(question=question, context=context, **kwargs) elif input_type == "real": @@ -381,37 +426,48 @@ def benchmark_pipeline( input_data = load_text_data(config, batch_size) inputs = pipeline.input_schema(inputs=input_data, **kwargs) elif input_schema_requirement == "question": - _LOGGER.warn("Only batch size of 1 supported for Question Answering Pipeline") + _LOGGER.warn( + "Only batch size of 1 supported for Question Answering Pipeline" + ) question, context = load_question_data(config) inputs = pipeline.input_schema(question=question, context=context, **kwargs) else: raise Exception(f"Unknown input type '{input_type}'") - start_time = time.perf_counter() if scenario == "singlestream": + singlestream_benchmark(pipeline, inputs, warmup_time) batch_times = singlestream_benchmark(pipeline, inputs, seconds_to_run) elif scenario == "multistream": - batch_times = multistream_benchmark(pipeline, inputs, seconds_to_run, num_streams) + multistream_benchmark(pipeline, inputs, warmup_time, num_streams) + batch_times = multistream_benchmark( + pipeline, inputs, seconds_to_run, num_streams + ) elif scenario == "elastic": - batch_times = multistream_benchmark(pipeline, inputs, seconds_to_run, num_streams) + multistream_benchmark(pipeline, inputs, warmup_time, num_streams) + batch_times = multistream_benchmark( + pipeline, inputs, seconds_to_run, num_streams + ) else: raise Exception(f"Unknown scenario '{scenario}'") end_time = time.perf_counter() total_run_time = end_time - start_time if len(batch_times) == 0: - raise Exception("Generated no batch timings, try extending benchmark time with '--time'") + raise Exception( + "Generated no batch timings, try extending benchmark time with '--time'" + ) return batch_times, total_run_time + def calculate_statistics(batch_times_ms: List[float], total_run_time_ms: float) -> Dict: percentiles = [25.0, 50.0, 75.0, 90.0, 95.0, 99.0, 99.9] buckets = numpy.percentile(batch_times_ms, percentiles).tolist() percentiles_dict = { "{:2.1f}%".format(key): value for key, value in zip(percentiles, buckets) } - + benchmark_dict = { "total_percentage": sum(batch_times_ms) / total_run_time_ms * 100, "median": numpy.median(batch_times_ms), @@ -421,7 +477,10 @@ def calculate_statistics(batch_times_ms: List[float], total_run_time_ms: float) } return benchmark_dict -def calculate_section_stats(batch_times: List[StagedTimer], total_run_time: float) -> Dict[str, Dict]: + +def calculate_section_stats( + batch_times: List[StagedTimer], total_run_time: float +) -> Dict[str, Dict]: compute_sections = batch_times[0].stages total_run_time_ms = total_run_time * 1000 @@ -438,33 +497,33 @@ def main(): print("Original Model Path: {}".format(args.model_path)) print("Task: {}".format(args.task_name)) - print("Input Type: {}".format(args.input_type)) print("Batch Size: {}".format(args.batch_size)) print("Scenario: {}".format(args.scenario)) batch_times, total_run_time = benchmark_pipeline( model_path=args.model_path, task=args.task_name, - input_config = args.input_config, + input_config=args.input_config, batch_size=args.batch_size, num_cores=args.num_cores, scenario=args.scenario, seconds_to_run=args.time, + warmup_time=args.warmup_time, num_streams=args.num_streams, thread_pinning=args.thread_pinning, + engine=args.engine, quiet=args.quiet, ) section_stats = calculate_section_stats(batch_times, total_run_time) items_per_sec = (len(batch_times) * args.batch_size) / total_run_time - export_dict = { "scenario": args.scenario, "items_per_sec": items_per_sec, "seconds_ran": total_run_time, "iterations": len(batch_times), - "compute_sections": section_stats + "compute_sections": section_stats, } # Export results @@ -479,20 +538,21 @@ def main(): print("Batch Size: {}".format(args.batch_size)) print("Scenario: {}".format(args.scenario)) print("Iterations: {}".format(int(export_dict["iterations"]))) - print( - "Throughput (items/sec): {:.4f}".format( - export_dict["items_per_sec"] - ) - ) + print("Throughput (items/sec): {:.4f}".format(export_dict["items_per_sec"])) print("Processing Time Breakdown: ") compute_sections = batch_times[0].stages for section in compute_sections: - print(" {}: {:.2f}%".format(section, section_stats[section]["total_percentage"])) - + print( + " {}: {:.2f}%".format( + section, section_stats[section]["total_percentage"] + ) + ) + print("Mean Latency Breakdown (ms/batch): ") for section in compute_sections: print(" {}: {:.4f}".format(section, section_stats[section]["mean"])) + if __name__ == "__main__": main() diff --git a/src/deepsparse/benchmark/helpers.py b/src/deepsparse/benchmark/helpers.py index 14c90653a6..d0ccb95295 100644 --- a/src/deepsparse/benchmark/helpers.py +++ b/src/deepsparse/benchmark/helpers.py @@ -1,14 +1,30 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os from deepsparse import Scheduler + __all__ = [ "decide_thread_pinning", "parse_scheduler", "parse_scenario", - "parse_num_streams" + "parse_num_streams", ] + def decide_thread_pinning(pinning_mode: str, logger: object) -> None: pinning_mode = pinning_mode.lower() if pinning_mode in "core": @@ -29,6 +45,7 @@ def decide_thread_pinning(pinning_mode: str, logger: object) -> None: ) ) + def parse_scheduler(scenario: str) -> Scheduler: scenario = scenario.lower() if scenario == "multistream": @@ -39,7 +56,8 @@ def parse_scheduler(scenario: str) -> Scheduler: return Scheduler.elastic else: return Scheduler.multi_stream - + + def parse_scenario(scenario: str, logger: object) -> str: scenario = scenario.lower() if scenario == "async": @@ -55,7 +73,8 @@ def parse_scenario(scenario: str, logger: object) -> str: ) ) return "multistream" - + + def parse_num_streams(num_streams: int, num_cores: int, scenario: str, logger: object): # If model.num_streams is set, and the scenario is either "multi_stream" or # "elastic", use the value of num_streams given to us by the model, otherwise From 75f5173a46d7196785f6be76e7cacab3cd0289a1 Mon Sep 17 00:00:00 2001 From: Sara Adkins Date: Fri, 28 Jul 2023 11:40:23 -0400 Subject: [PATCH 12/37] moving code around, update README --- src/deepsparse/benchmark/README.md | 12 + .../benchmark/benchmark_pipeline.py | 262 +++++------------- src/deepsparse/benchmark/data_creation.py | 170 ++++++++++++ src/deepsparse/benchmark/helpers.py | 13 + 4 files changed, 271 insertions(+), 186 deletions(-) create mode 100644 src/deepsparse/benchmark/data_creation.py diff --git a/src/deepsparse/benchmark/README.md b/src/deepsparse/benchmark/README.md index 6bcb3477ec..f530c0255b 100644 --- a/src/deepsparse/benchmark/README.md +++ b/src/deepsparse/benchmark/README.md @@ -254,4 +254,16 @@ Additional arguments to the pipeline or input_schema can be added to the `pipeli "conf_thres": 0.7 } } +``` + +### Example Usage + +Running image classification for 30 seconds with a batch size of 32: +``` +python benchmark_pipeline.py image_classification zoo:cv/classification/resnet_v1-50_2x/pytorch/sparseml/imagenet/base-none -c config.json -t 60 -b 32 +``` + +Running text generation for 30 seconds asynchronously +``` +python benchmark_pipeline.py text_generation image_classification zoo:nlg/text_generation/codegen_mono-350m/pytorch/huggingface/bigpython_bigquery_thepile/pruned50-none -c config.json -t 30 -s async ``` \ No newline at end of file diff --git a/src/deepsparse/benchmark/benchmark_pipeline.py b/src/deepsparse/benchmark/benchmark_pipeline.py index a791b11853..1ff586253b 100644 --- a/src/deepsparse/benchmark/benchmark_pipeline.py +++ b/src/deepsparse/benchmark/benchmark_pipeline.py @@ -13,21 +13,28 @@ # limitations under the License. import argparse -import glob import json import logging import queue -import random -import string import threading import time from typing import Dict, List, Tuple import numpy -from deepsparse import Pipeline +from deepsparse import Pipeline, __version__ +from deepsparse.benchmark.data_creation import ( + generate_image_data, + generate_question_data, + generate_text_data, + get_input_schema_type, + load_image_data, + load_question_data, + load_text_data, +) from deepsparse.benchmark.helpers import ( decide_thread_pinning, + parse_input_config, parse_num_streams, parse_scenario, parse_scheduler, @@ -45,9 +52,6 @@ DEEPSPARSE_ENGINE = "deepsparse" ORT_ENGINE = "onnxruntime" -DEFAULT_STRING_LENGTH = 50 -DEFAULT_IMAGE_SHAPE = (240, 240, 3) - def parse_args(): parser = argparse.ArgumentParser(description="Benchmark DeepSparse Pipelines") @@ -218,147 +222,56 @@ def multistream_benchmark( return list(time_queue.queue) -def parse_input_config(input_config_file: str) -> Dict[str, any]: - config_file = open(input_config_file) - config = json.load(config_file) - config_file.close() - return config - - -def get_files_with_endings( - folder: str, num_files: int, recursive: bool, file_endings: List[str] -) -> List[str]: - files = [] - for f in glob.glob(folder + "/**", recursivere=recursive): - if f.lower().endswith(file_endings): - files.append(f) - if len(files) < num_files: - raise Exception("Not enough images found in {}".format(folder)) - return random.sample(files, num_files) - - -def generate_sentence(string_length: int, avg_word_length: int = 5): - random_chars = "".join(random.choices(string.ascii_letters, k=string_length)) - space_locations = random.sample( - range(string_length), int(string_length / avg_word_length) - ) - random_chars = list(random_chars) - for loc in space_locations: - random_chars[loc] = " " - return "".join(random_chars) - - -def get_input_schema_type(pipeline: Pipeline) -> str: - input_schema_requirements = list(pipeline.input_schema.__fields__.keys()) - input_schema_fields = pipeline.input_schema.__fields__ - - if "images" in input_schema_requirements: - return "image" - if "sequences" in input_schema_requirements: - sequence_types = [ - f.outer_type_ for f in input_schema_fields["sequences"].sub_fields - ] - if List[str] in sequence_types: - return "text_sequence" - elif "inputs" in input_schema_requirements: - sequence_types = [ - f.outer_type_ for f in input_schema_fields["inputs"].sub_fields - ] - if List[str] in sequence_types: - return "text_inputs" - elif "question" in input_schema_requirements: - return "question" - - raise Exception("Unknown schema requirement {}".format(input_schema_requirements)) - - -def generate_image_data(config: Dict, batch_size: int) -> List[numpy.ndarray]: - input_data = [] - if "input_image_shape" in config and len(config["input_image_shape"]) == 3: - image_shape = config["input_image_shape"] - else: - image_shape = DEFAULT_IMAGE_SHAPE - _LOGGER.warning("Using default image shape {}".format(image_shape)) - - for _ in range(batch_size): - rand_array = numpy.random.randint(0, high=255, size=image_shape).astype( - numpy.uint8 - ) - input_data.append(rand_array) - - return input_data - - -def load_image_data(config: Dict, batch_size: int) -> List[str]: - path_to_data = config["data_folder"] - recursive_search = config["recursive_search"] - return get_files_with_endings( - path_to_data, batch_size, recursive_search, [".jpg", ".jpeg", ".gif"] - ) - - -def generate_text_data(config: Dict, batch_size: int) -> List[str]: - input_data = [] - if "gen_sequence_length" in config: - string_length = config["gen_sequence_length"] - else: - string_length = DEFAULT_STRING_LENGTH - _LOGGER.warning("Using default string length {}".format(string_length)) - for _ in range(batch_size): - rand_sentence = generate_sentence(string_length) - input_data.append(rand_sentence) - - return input_data - +def create_input_schema( + pipeline: Pipeline, input_type: str, batch_size: int, config: Dict +) -> any: + input_schema_requirement = get_input_schema_type(pipeline) + kwargs = {} + if "input_schema_kwargs" in config: + kwargs = config["input_schema_kwargs"] -def load_text_data(config: Dict, batch_size: int) -> List[str]: - path_to_data = config["data_folder"] - recursive_search = config["recursive_search"] - input_files = get_files_with_endings( - path_to_data, batch_size, recursive_search, [".txt"] - ) - if "max_string_length" in config: - max_string_length = config["max_string_length"] - else: - max_string_length = -1 - _LOGGER.warning("Using default max string length {}".format(max_string_length)) - input_data = [] - for f_path in input_files: - f = open(f_path) - text_data = f.read() - f.close() - input_data.append(text_data[:max_string_length]) - return input_data - - -def generate_question_data(config: Dict) -> Tuple[str, str]: - if "gen_sequence_length" in config: - string_length = config["gen_sequence_length"] + if input_type == "dummy": + if input_schema_requirement == "image": + input_data = generate_image_data(config, batch_size, _LOGGER) + inputs = pipeline.input_schema(images=input_data, **kwargs) + elif input_schema_requirement == "text_sequence": + input_data = generate_text_data(config, batch_size, _LOGGER) + inputs = pipeline.input_schema(sequences=input_data, **kwargs) + elif input_schema_requirement == "text_inputs": + input_data = generate_text_data(config, batch_size, _LOGGER) + inputs = pipeline.input_schema(inputs=input_data, **kwargs) + elif input_schema_requirement == "question": + _LOGGER.warn( + "Only batch size of 1 supported for Question Answering Pipeline" + ) + question, context = generate_question_data(config, _LOGGER) + inputs = pipeline.input_schema(question=question, context=context, **kwargs) + elif input_type == "real": + if input_schema_requirement == "image": + input_data = load_image_data(config, batch_size) + inputs = pipeline.input_schema(images=input_data, **kwargs) + elif input_schema_requirement == "text_sequence": + input_data = load_text_data(config, _LOGGER) + inputs = pipeline.input_schema(sequences=input_data, **kwargs) + elif input_schema_requirement == "text_inputs": + input_data = load_text_data(config, batch_size, _LOGGER) + inputs = pipeline.input_schema(inputs=input_data, **kwargs) + elif input_schema_requirement == "question": + _LOGGER.warn( + "Only batch size of 1 supported for Question Answering Pipeline" + ) + question, context = load_question_data(config) + inputs = pipeline.input_schema(question=question, context=context, **kwargs) else: - string_length = DEFAULT_STRING_LENGTH - _LOGGER.warning("Using default string length {}".format(string_length)) - question = generate_sentence(string_length) - context = generate_sentence(string_length) - return (question, context) - - -def load_question_data(config: Dict) -> Tuple[str, str]: - path_to_questions = config["question_file"] - path_to_context = config["context_file"] + raise Exception(f"Unknown input type '{input_type}'") - f_question = open(path_to_questions) - f_context = open(path_to_context) - question = f_question.read() - context = f_context.read() - f_question.close() - f_context.close() - return question, context + return inputs def benchmark_pipeline( model_path: str, task: str, - input_config: str, + config: Dict, batch_size: int = 1, num_cores: int = None, scenario: str = "sync", @@ -381,7 +294,6 @@ def benchmark_pipeline( scheduler = parse_scheduler(scenario) num_streams = parse_num_streams(num_streams, num_cores, scenario, _LOGGER) - config = parse_input_config(input_config) input_type = config["data_type"] kwargs = {} if "pipeline_kwargs" in config: @@ -394,57 +306,21 @@ def benchmark_pipeline( num_cores=num_cores, **kwargs, ) - input_schema_requirement = get_input_schema_type(pipeline) - kwargs = {} - if "input_schema_kwargs" in config: - kwargs = config["input_schema_kwargs"] - - if input_type == "dummy": - if input_schema_requirement == "image": - input_data = generate_image_data(config, batch_size) - inputs = pipeline.input_schema(images=input_data, **kwargs) - elif input_schema_requirement == "text_sequence": - input_data = generate_text_data(config, batch_size) - inputs = pipeline.input_schema(sequences=input_data, **kwargs) - elif input_schema_requirement == "text_inputs": - input_data = generate_text_data(config, batch_size) - inputs = pipeline.input_schema(inputs=input_data, **kwargs) - elif input_schema_requirement == "question": - _LOGGER.warn( - "Only batch size of 1 supported for Question Answering Pipeline" - ) - question, context = generate_question_data(config) - inputs = pipeline.input_schema(question=question, context=context, **kwargs) - elif input_type == "real": - if input_schema_requirement == "image": - input_data = load_image_data(config, batch_size) - inputs = pipeline.input_schema(images=input_data, **kwargs) - elif input_schema_requirement == "text_sequence": - input_data = load_text_data(config, batch_size) - inputs = pipeline.input_schema(sequences=input_data, **kwargs) - elif input_schema_requirement == "text_inputs": - input_data = load_text_data(config, batch_size) - inputs = pipeline.input_schema(inputs=input_data, **kwargs) - elif input_schema_requirement == "question": - _LOGGER.warn( - "Only batch size of 1 supported for Question Answering Pipeline" - ) - question, context = load_question_data(config) - inputs = pipeline.input_schema(question=question, context=context, **kwargs) - else: - raise Exception(f"Unknown input type '{input_type}'") + inputs = create_input_schema(pipeline, input_type, batch_size, config) - start_time = time.perf_counter() if scenario == "singlestream": singlestream_benchmark(pipeline, inputs, warmup_time) + start_time = time.perf_counter() batch_times = singlestream_benchmark(pipeline, inputs, seconds_to_run) elif scenario == "multistream": multistream_benchmark(pipeline, inputs, warmup_time, num_streams) + start_time = time.perf_counter() batch_times = multistream_benchmark( pipeline, inputs, seconds_to_run, num_streams ) elif scenario == "elastic": multistream_benchmark(pipeline, inputs, warmup_time, num_streams) + start_time = time.perf_counter() batch_times = multistream_benchmark( pipeline, inputs, seconds_to_run, num_streams ) @@ -494,6 +370,7 @@ def calculate_section_stats( def main(): args = parse_args() + config = parse_input_config(args.input_config) print("Original Model Path: {}".format(args.model_path)) print("Task: {}".format(args.task_name)) @@ -503,7 +380,7 @@ def main(): batch_times, total_run_time = benchmark_pipeline( model_path=args.model_path, task=args.task_name, - input_config=args.input_config, + config=config, batch_size=args.batch_size, num_cores=args.num_cores, scenario=args.scenario, @@ -518,14 +395,26 @@ def main(): section_stats = calculate_section_stats(batch_times, total_run_time) items_per_sec = (len(batch_times) * args.batch_size) / total_run_time - export_dict = { - "scenario": args.scenario, + benchmark_results = { "items_per_sec": items_per_sec, "seconds_ran": total_run_time, "iterations": len(batch_times), "compute_sections": section_stats, } + export_dict = { + "engine": args.engine, + "version": __version__, + "model_path": args.model_path, + "batch_size": args.batch_size, + "num_cores": args.num_cores, + "scenario": args.scenario, + "seconds_to_run": time, + "num_streams": args.num_streams, + "input_config": config, + "benchmark_results": benchmark_results, + } + # Export results export_path = args.export_path if export_path: @@ -537,8 +426,9 @@ def main(): print("Original Model Path: {}".format(args.model_path)) print("Batch Size: {}".format(args.batch_size)) print("Scenario: {}".format(args.scenario)) - print("Iterations: {}".format(int(export_dict["iterations"]))) - print("Throughput (items/sec): {:.4f}".format(export_dict["items_per_sec"])) + print("Iterations: {}".format(int(benchmark_results["iterations"]))) + print("Total Runtime: {:.4f}".format(total_run_time)) + print("Throughput (items/sec): {:.4f}".format(benchmark_results["items_per_sec"])) print("Processing Time Breakdown: ") compute_sections = batch_times[0].stages diff --git a/src/deepsparse/benchmark/data_creation.py b/src/deepsparse/benchmark/data_creation.py new file mode 100644 index 0000000000..6b1036e98e --- /dev/null +++ b/src/deepsparse/benchmark/data_creation.py @@ -0,0 +1,170 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import glob +import random +import string +from typing import Dict, List, Tuple + +import numpy + +from deepsparse import Pipeline + + +DEFAULT_STRING_LENGTH = 50 +DEFAULT_IMAGE_SHAPE = (240, 240, 3) + +__all__ = [ + "get_input_schema_type", + "get_files_with_endings", + "generate_sentence", + "generate_image_data", + "load_image_data", + "generate_text_data", + "load_text_data", + "generate_question_data", + "load_question_data", +] + + +def get_input_schema_type(pipeline: Pipeline) -> str: + input_schema_requirements = list(pipeline.input_schema.__fields__.keys()) + input_schema_fields = pipeline.input_schema.__fields__ + + if "images" in input_schema_requirements: + return "image" + if "sequences" in input_schema_requirements: + sequence_types = [ + f.outer_type_ for f in input_schema_fields["sequences"].sub_fields + ] + if List[str] in sequence_types: + return "text_sequence" + elif "inputs" in input_schema_requirements: + sequence_types = [ + f.outer_type_ for f in input_schema_fields["inputs"].sub_fields + ] + if List[str] in sequence_types: + return "text_inputs" + elif "question" in input_schema_requirements: + return "question" + + raise Exception("Unknown schema requirement {}".format(input_schema_requirements)) + + +def get_files_with_endings( + folder: str, num_files: int, recursive: bool, file_endings: List[str] +) -> List[str]: + files = [] + for f in glob.glob(folder + "/**", recursivere=recursive): + if f.lower().endswith(file_endings): + files.append(f) + if len(files) < num_files: + raise Exception("Not enough images found in {}".format(folder)) + return random.sample(files, num_files) + + +def generate_sentence(string_length: int, avg_word_length: int = 5): + random_chars = "".join(random.choices(string.ascii_letters, k=string_length)) + space_locations = random.sample( + range(string_length), int(string_length / avg_word_length) + ) + random_chars = list(random_chars) + for loc in space_locations: + random_chars[loc] = " " + return "".join(random_chars) + + +def generate_image_data( + config: Dict, batch_size: int, logger: object +) -> List[numpy.ndarray]: + input_data = [] + if "input_image_shape" in config and len(config["input_image_shape"]) == 3: + image_shape = config["input_image_shape"] + else: + image_shape = DEFAULT_IMAGE_SHAPE + logger.warning("Using default image shape {}".format(image_shape)) + + for _ in range(batch_size): + rand_array = numpy.random.randint(0, high=255, size=image_shape).astype( + numpy.uint8 + ) + input_data.append(rand_array) + + return input_data + + +def load_image_data(config: Dict, batch_size: int) -> List[str]: + path_to_data = config["data_folder"] + recursive_search = config["recursive_search"] + return get_files_with_endings( + path_to_data, batch_size, recursive_search, [".jpg", ".jpeg", ".gif"] + ) + + +def generate_text_data(config: Dict, batch_size: int, logger: object) -> List[str]: + input_data = [] + if "gen_sequence_length" in config: + string_length = config["gen_sequence_length"] + else: + string_length = DEFAULT_STRING_LENGTH + logger.warning("Using default string length {}".format(string_length)) + for _ in range(batch_size): + rand_sentence = generate_sentence(string_length) + input_data.append(rand_sentence) + + return input_data + + +def load_text_data(config: Dict, batch_size: int, logger: object) -> List[str]: + path_to_data = config["data_folder"] + recursive_search = config["recursive_search"] + input_files = get_files_with_endings( + path_to_data, batch_size, recursive_search, [".txt"] + ) + if "max_string_length" in config: + max_string_length = config["max_string_length"] + else: + max_string_length = -1 + logger.warning("Using default max string length {}".format(max_string_length)) + input_data = [] + for f_path in input_files: + f = open(f_path) + text_data = f.read() + f.close() + input_data.append(text_data[:max_string_length]) + return input_data + + +def generate_question_data(config: Dict, logger: object) -> Tuple[str, str]: + if "gen_sequence_length" in config: + string_length = config["gen_sequence_length"] + else: + string_length = DEFAULT_STRING_LENGTH + logger.warning("Using default string length {}".format(string_length)) + question = generate_sentence(string_length) + context = generate_sentence(string_length) + return (question, context) + + +def load_question_data(config: Dict) -> Tuple[str, str]: + path_to_questions = config["question_file"] + path_to_context = config["context_file"] + + f_question = open(path_to_questions) + f_context = open(path_to_context) + question = f_question.read() + context = f_context.read() + f_question.close() + f_context.close() + return question, context diff --git a/src/deepsparse/benchmark/helpers.py b/src/deepsparse/benchmark/helpers.py index d0ccb95295..50d3bced2b 100644 --- a/src/deepsparse/benchmark/helpers.py +++ b/src/deepsparse/benchmark/helpers.py @@ -12,16 +12,22 @@ # See the License for the specific language governing permissions and # limitations under the License. +import json import os +from typing import Dict from deepsparse import Scheduler +DEFAULT_STRING_LENGTH = 50 +DEFAULT_IMAGE_SHAPE = (240, 240, 3) + __all__ = [ "decide_thread_pinning", "parse_scheduler", "parse_scenario", "parse_num_streams", + "parse_input_config", ] @@ -95,3 +101,10 @@ def parse_num_streams(num_streams: int, num_cores: int, scenario: str, logger: o ) ) return default_num_streams + + +def parse_input_config(input_config_file: str) -> Dict[str, any]: + config_file = open(input_config_file) + config = json.load(config_file) + config_file.close() + return config From 9202a6fdc46c6aec38abd925fe4b893b9b8ff023 Mon Sep 17 00:00:00 2001 From: Sara Adkins Date: Fri, 28 Jul 2023 15:59:03 -0400 Subject: [PATCH 13/37] adding unit tests --- setup.py | 1 + src/deepsparse/benchmark/README.md | 12 +- src/deepsparse/benchmark/benchmark_model.py | 6 +- .../benchmark/benchmark_pipeline.py | 111 ++++++++++--- src/deepsparse/benchmark/data_creation.py | 54 +++--- src/deepsparse/benchmark/helpers.py | 23 +-- tests/test_pipeline_benchmark.py | 154 ++++++++++++++++++ 7 files changed, 300 insertions(+), 61 deletions(-) create mode 100644 tests/test_pipeline_benchmark.py diff --git a/setup.py b/setup.py index 8e425bd816..84289d3f6b 100644 --- a/setup.py +++ b/setup.py @@ -292,6 +292,7 @@ def _setup_entry_points() -> Dict: "deepsparse.analyze=deepsparse.analyze:main", "deepsparse.check_hardware=deepsparse.cpu:print_hardware_capability", "deepsparse.benchmark=deepsparse.benchmark.benchmark_model:main", + "deepsparse.benchmark_pipeline=deepsparse.benchmark.benchmark_pipeline:main", # noqa E501 "deepsparse.benchmark_sweep=deepsparse.benchmark.benchmark_sweep:main", "deepsparse.server=deepsparse.server.cli:main", "deepsparse.object_detection.annotate=deepsparse.yolo.annotate:main", diff --git a/src/deepsparse/benchmark/README.md b/src/deepsparse/benchmark/README.md index f530c0255b..7912abe18c 100644 --- a/src/deepsparse/benchmark/README.md +++ b/src/deepsparse/benchmark/README.md @@ -189,10 +189,10 @@ Iterations: 622 ``` ## 📜 Benchmarking Pipelines -Expanding on the model benchmarking script, the pipeline benchmarker is a tool for benchmarking end-to-end inference, including pre and post processing. The script can generate fake input data based on the pipeline's input schema, or load it from a local folder. The pipeline then runs pre-processing, engine inference and post-processing. Benchmarking results are reported by section, useful for identifying bottlenecks. +Expanding on the model benchmarking script, `deepsparse.benchmark_pipeline` is a tool for benchmarking end-to-end inference, including pre and post processing. The script can generate fake input data based on the pipeline's input schema, or load it from a local folder. The pipeline then runs pre-processing, engine inference and post-processing. Benchmarking results are reported by section, useful for identifying bottlenecks. ### Usage -Input arguments are the same as the Engine benchmarker, but with two addtions: +Input arguments are the same as the Engine benchmarker, but with two additions: ``` positional arguments: @@ -258,12 +258,12 @@ Additional arguments to the pipeline or input_schema can be added to the `pipeli ### Example Usage -Running image classification for 30 seconds with a batch size of 32: +Running ResNet image classification for 30 seconds with a batch size of 32: ``` -python benchmark_pipeline.py image_classification zoo:cv/classification/resnet_v1-50_2x/pytorch/sparseml/imagenet/base-none -c config.json -t 60 -b 32 +deepsparse.benchmark_pipeline image_classification zoo:cv/classification/resnet_v1-50_2x/pytorch/sparseml/imagenet/base-none -c config.json -t 60 -b 32 ``` -Running text generation for 30 seconds asynchronously +Running CodeGen text generation for 30 seconds asynchronously ``` -python benchmark_pipeline.py text_generation image_classification zoo:nlg/text_generation/codegen_mono-350m/pytorch/huggingface/bigpython_bigquery_thepile/pruned50-none -c config.json -t 30 -s async +deepsparse.benchmark_pipeline text_generation zoo:nlg/text_generation/codegen_mono-350m/pytorch/huggingface/bigpython_bigquery_thepile/pruned50-none -c config.json -t 30 -s async ``` \ No newline at end of file diff --git a/src/deepsparse/benchmark/benchmark_model.py b/src/deepsparse/benchmark/benchmark_model.py index 04fcdb8c7a..8c978ce87c 100644 --- a/src/deepsparse/benchmark/benchmark_model.py +++ b/src/deepsparse/benchmark/benchmark_model.py @@ -282,15 +282,15 @@ def benchmark_model( if num_cores is None: num_cores = cpu_architecture().num_available_physical_cores - decide_thread_pinning(thread_pinning, _LOGGER) + decide_thread_pinning(thread_pinning) - scenario = parse_scenario(scenario.lower(), _LOGGER) + scenario = parse_scenario(scenario.lower()) scheduler = parse_scheduler(scenario) input_shapes = parse_input_shapes(input_shapes) orig_model_path = model_path model_path = model_to_path(model_path) - num_streams = parse_num_streams(num_streams, num_cores, scenario, _LOGGER) + num_streams = parse_num_streams(num_streams, num_cores, scenario) # Compile the ONNX into a runnable model if engine == DEEPSPARSE_ENGINE: diff --git a/src/deepsparse/benchmark/benchmark_pipeline.py b/src/deepsparse/benchmark/benchmark_pipeline.py index 1ff586253b..9cb8a9ed25 100644 --- a/src/deepsparse/benchmark/benchmark_pipeline.py +++ b/src/deepsparse/benchmark/benchmark_pipeline.py @@ -12,6 +12,75 @@ # See the License for the specific language governing permissions and # limitations under the License. +""" +Benchmark DeepSparse Pipelines + +########## +Command help: +usage: deepsparse.benchmark_pipeline [-h] [-c INPUT_CONFIG] [-b BATCH_SIZE] + [-ncores NUM_CORES] [-s {async,sync,elastic}] + [-t TIME] [-w WARMUP_TIME] [-nstreams NUM_STREAMS] + [-pin {none,core,numa}] [-e ENGINE] + [-q] [-x EXPORT_PATH] task_name model_path + +positional arguments: + task_name Type of pipeline to run + model_path Path to an ONNX model file or SparseZoo model stub + +optional arguments: + -h, --help show this help message and exit + -c INPUT_CONFIG, --input_config INPUT_CONFIG + JSON file containing schema for input data + -b BATCH_SIZE, --batch_size BATCH_SIZE + The batch size to run the analysis for. Must be greater than 0 + -ncores NUM_CORES, --num_cores NUM_CORES + The number of physical cores to run the analysis on, + defaults to all physical cores available on the system. + -s {async,sync,elastic}, --scenario {async,sync,elastic} + Choose between using the async, sync and elastic + scenarios. Sync and async are similar to the single- + stream/multi-stream scenarios. Elastic is a newer + scenario that behaves similarly to the async scenario + but uses a different scheduling backend. Default value + is sync. + -t TIME, --time TIME The number of seconds the benchmark will run. Default + is 10 seconds. + -w WARMUP_TIME, --warmup_time WARMUP_TIME + The number of seconds the benchmark will warmup before + running.Default is 2 seconds. + -nstreams NUM_STREAMS, --num_streams NUM_STREAMS + The number of streams that will submit inferences in + parallel using async scenario. Default is + automatically determined for given hardware and may be + sub-optimal. + -pin {none,core,numa}, --thread_pinning {none,core,numa} + Enable binding threads to cores ('core' the default), + threads to cores on sockets ('numa'), or disable + ('none'). + -e {deepsparse,onnxruntime}, --engine {deepsparse,onnxruntime} + Inference engine backend to run eval on. Choices are + 'deepsparse', 'onnxruntime'. Default is 'deepsparse'. + -q, --quiet Lower logging verbosity. + -x EXPORT_PATH, --export_path EXPORT_PATH + Store results into a JSON file. + +########## +Example ResNet image classification for 30 seconds with a batch size of 32: +``` +deepsparse.benchmark_pipeline \ + image_classification \ + zoo:cv/classification/resnet_v1-50_2x/pytorch/sparseml/imagenet/base-none \ + -c config.json -t 60 -b 32 + +########## +Example CodeGen text generation for 30 seconds asynchronously +deepsparse.benchmark_pipeline \ + text_generation \ + zoo:nlg/text_generation/codegen_mono-350m/pytorch/huggingface/ + bigpython_bigquery_thepile/pruned50-none \ + -c config.json -t 30 -s async +""" + import argparse import json import logging @@ -24,6 +93,7 @@ from deepsparse import Pipeline, __version__ from deepsparse.benchmark.data_creation import ( + SchemaType, generate_image_data, generate_question_data, generate_text_data, @@ -52,6 +122,9 @@ DEEPSPARSE_ENGINE = "deepsparse" ORT_ENGINE = "onnxruntime" +DUMMY_INPUT_TYPE = "dummy" +REAL_INPUT_TYPE = "real" + def parse_args(): parser = argparse.ArgumentParser(description="Benchmark DeepSparse Pipelines") @@ -230,33 +303,33 @@ def create_input_schema( if "input_schema_kwargs" in config: kwargs = config["input_schema_kwargs"] - if input_type == "dummy": - if input_schema_requirement == "image": - input_data = generate_image_data(config, batch_size, _LOGGER) + if input_type == DUMMY_INPUT_TYPE: + if input_schema_requirement == SchemaType.IMAGE: + input_data = generate_image_data(config, batch_size) inputs = pipeline.input_schema(images=input_data, **kwargs) - elif input_schema_requirement == "text_sequence": - input_data = generate_text_data(config, batch_size, _LOGGER) + elif input_schema_requirement == SchemaType.TEXT_SEQ: + input_data = generate_text_data(config, batch_size) inputs = pipeline.input_schema(sequences=input_data, **kwargs) - elif input_schema_requirement == "text_inputs": - input_data = generate_text_data(config, batch_size, _LOGGER) + elif input_schema_requirement == SchemaType.TEXT_INPUT: + input_data = generate_text_data(config, batch_size) inputs = pipeline.input_schema(inputs=input_data, **kwargs) - elif input_schema_requirement == "question": + elif input_schema_requirement == SchemaType.QUESTION: _LOGGER.warn( "Only batch size of 1 supported for Question Answering Pipeline" ) - question, context = generate_question_data(config, _LOGGER) + question, context = generate_question_data(config) inputs = pipeline.input_schema(question=question, context=context, **kwargs) - elif input_type == "real": - if input_schema_requirement == "image": + elif input_type == REAL_INPUT_TYPE: + if input_schema_requirement == SchemaType.IMAGE: input_data = load_image_data(config, batch_size) inputs = pipeline.input_schema(images=input_data, **kwargs) - elif input_schema_requirement == "text_sequence": - input_data = load_text_data(config, _LOGGER) + elif input_schema_requirement == SchemaType.TEXT_SEQ: + input_data = load_text_data(config) inputs = pipeline.input_schema(sequences=input_data, **kwargs) - elif input_schema_requirement == "text_inputs": - input_data = load_text_data(config, batch_size, _LOGGER) + elif input_schema_requirement == SchemaType.TEXT_INPUT: + input_data = load_text_data(config, batch_size) inputs = pipeline.input_schema(inputs=input_data, **kwargs) - elif input_schema_requirement == "question": + elif input_schema_requirement == SchemaType.QUESTION: _LOGGER.warn( "Only batch size of 1 supported for Question Answering Pipeline" ) @@ -289,10 +362,10 @@ def benchmark_pipeline( if num_cores is None: num_cores = cpu_architecture().num_available_physical_cores - decide_thread_pinning(thread_pinning, _LOGGER) - scenario = parse_scenario(scenario.lower(), _LOGGER) + decide_thread_pinning(thread_pinning) + scenario = parse_scenario(scenario.lower()) scheduler = parse_scheduler(scenario) - num_streams = parse_num_streams(num_streams, num_cores, scenario, _LOGGER) + num_streams = parse_num_streams(num_streams, num_cores, scenario) input_type = config["data_type"] kwargs = {} diff --git a/src/deepsparse/benchmark/data_creation.py b/src/deepsparse/benchmark/data_creation.py index 6b1036e98e..502a6ca805 100644 --- a/src/deepsparse/benchmark/data_creation.py +++ b/src/deepsparse/benchmark/data_creation.py @@ -13,6 +13,7 @@ # limitations under the License. import glob +import logging import random import string from typing import Dict, List, Tuple @@ -22,6 +23,8 @@ from deepsparse import Pipeline +_LOGGER = logging.getLogger(__name__) + DEFAULT_STRING_LENGTH = 50 DEFAULT_IMAGE_SHAPE = (240, 240, 3) @@ -38,26 +41,33 @@ ] +class SchemaType: + IMAGE: str = "images" + TEXT_SEQ: str = "sequences" + TEXT_INPUT: str = "inputs" + QUESTION: str = "question" + + def get_input_schema_type(pipeline: Pipeline) -> str: input_schema_requirements = list(pipeline.input_schema.__fields__.keys()) input_schema_fields = pipeline.input_schema.__fields__ - if "images" in input_schema_requirements: - return "image" - if "sequences" in input_schema_requirements: + if SchemaType.IMAGE in input_schema_requirements: + return SchemaType.IMAGE + if SchemaType.TEXT_SEQ in input_schema_requirements: sequence_types = [ - f.outer_type_ for f in input_schema_fields["sequences"].sub_fields + f.outer_type_ for f in input_schema_fields[SchemaType.TEXT_SEQ].sub_fields ] if List[str] in sequence_types: - return "text_sequence" - elif "inputs" in input_schema_requirements: + return SchemaType.TEXT_SEQ + elif SchemaType.TEXT_INPUT in input_schema_requirements: sequence_types = [ - f.outer_type_ for f in input_schema_fields["inputs"].sub_fields + f.outer_type_ for f in input_schema_fields[SchemaType.TEXT_INPUT].sub_fields ] if List[str] in sequence_types: - return "text_inputs" - elif "question" in input_schema_requirements: - return "question" + return SchemaType.TEXT_INPUT + elif SchemaType.QUESTION in input_schema_requirements: + return SchemaType.QUESTION raise Exception("Unknown schema requirement {}".format(input_schema_requirements)) @@ -85,15 +95,13 @@ def generate_sentence(string_length: int, avg_word_length: int = 5): return "".join(random_chars) -def generate_image_data( - config: Dict, batch_size: int, logger: object -) -> List[numpy.ndarray]: +def generate_image_data(config: Dict, batch_size: int) -> List[numpy.ndarray]: input_data = [] if "input_image_shape" in config and len(config["input_image_shape"]) == 3: image_shape = config["input_image_shape"] else: image_shape = DEFAULT_IMAGE_SHAPE - logger.warning("Using default image shape {}".format(image_shape)) + _LOGGER.warning("Using default image shape {}".format(image_shape)) for _ in range(batch_size): rand_array = numpy.random.randint(0, high=255, size=image_shape).astype( @@ -112,21 +120,21 @@ def load_image_data(config: Dict, batch_size: int) -> List[str]: ) -def generate_text_data(config: Dict, batch_size: int, logger: object) -> List[str]: +def generate_text_data(config: Dict, batch_size: int, avg_word_len=5) -> List[str]: input_data = [] if "gen_sequence_length" in config: string_length = config["gen_sequence_length"] else: string_length = DEFAULT_STRING_LENGTH - logger.warning("Using default string length {}".format(string_length)) + _LOGGER.warning("Using default string length {}".format(string_length)) for _ in range(batch_size): - rand_sentence = generate_sentence(string_length) + rand_sentence = generate_sentence(string_length, avg_word_length=avg_word_len) input_data.append(rand_sentence) return input_data -def load_text_data(config: Dict, batch_size: int, logger: object) -> List[str]: +def load_text_data(config: Dict, batch_size: int) -> List[str]: path_to_data = config["data_folder"] recursive_search = config["recursive_search"] input_files = get_files_with_endings( @@ -136,7 +144,7 @@ def load_text_data(config: Dict, batch_size: int, logger: object) -> List[str]: max_string_length = config["max_string_length"] else: max_string_length = -1 - logger.warning("Using default max string length {}".format(max_string_length)) + _LOGGER.warning("Using default max string length {}".format(max_string_length)) input_data = [] for f_path in input_files: f = open(f_path) @@ -146,14 +154,14 @@ def load_text_data(config: Dict, batch_size: int, logger: object) -> List[str]: return input_data -def generate_question_data(config: Dict, logger: object) -> Tuple[str, str]: +def generate_question_data(config: Dict, avg_word_len=5) -> Tuple[str, str]: if "gen_sequence_length" in config: string_length = config["gen_sequence_length"] else: string_length = DEFAULT_STRING_LENGTH - logger.warning("Using default string length {}".format(string_length)) - question = generate_sentence(string_length) - context = generate_sentence(string_length) + _LOGGER.warning("Using default string length {}".format(string_length)) + question = generate_sentence(string_length, avg_word_length=avg_word_len) + context = generate_sentence(string_length, avg_word_length=avg_word_len) return (question, context) diff --git a/src/deepsparse/benchmark/helpers.py b/src/deepsparse/benchmark/helpers.py index 50d3bced2b..6702d269d7 100644 --- a/src/deepsparse/benchmark/helpers.py +++ b/src/deepsparse/benchmark/helpers.py @@ -13,12 +13,15 @@ # limitations under the License. import json +import logging import os from typing import Dict from deepsparse import Scheduler +_LOGGER = logging.getLogger(__name__) + DEFAULT_STRING_LENGTH = 50 DEFAULT_IMAGE_SHAPE = (240, 240, 3) @@ -31,21 +34,21 @@ ] -def decide_thread_pinning(pinning_mode: str, logger: object) -> None: +def decide_thread_pinning(pinning_mode: str) -> None: pinning_mode = pinning_mode.lower() if pinning_mode in "core": os.environ["NM_BIND_THREADS_TO_CORES"] = "1" - logger.info("Thread pinning to cores enabled") + _LOGGER.info("Thread pinning to cores enabled") elif pinning_mode in "numa": os.environ["NM_BIND_THREADS_TO_CORES"] = "0" os.environ["NM_BIND_THREADS_TO_SOCKETS"] = "1" - logger.info("Thread pinning to socket/numa nodes enabled") + _LOGGER.info("Thread pinning to socket/numa nodes enabled") elif pinning_mode in "none": os.environ["NM_BIND_THREADS_TO_CORES"] = "0" os.environ["NM_BIND_THREADS_TO_SOCKETS"] = "0" - logger.info("Thread pinning disabled, performance may be sub-optimal") + _LOGGER.info("Thread pinning disabled, performance may be sub-optimal") else: - logger.info( + _LOGGER.info( "Recieved invalid option for thread_pinning '{}', skipping".format( pinning_mode ) @@ -64,7 +67,7 @@ def parse_scheduler(scenario: str) -> Scheduler: return Scheduler.multi_stream -def parse_scenario(scenario: str, logger: object) -> str: +def parse_scenario(scenario: str) -> str: scenario = scenario.lower() if scenario == "async": return "multistream" @@ -73,7 +76,7 @@ def parse_scenario(scenario: str, logger: object) -> str: elif scenario == "elastic": return "elastic" else: - logger.info( + _LOGGER.info( "Recieved invalid option for scenario'{}', defaulting to async".format( scenario ) @@ -81,20 +84,20 @@ def parse_scenario(scenario: str, logger: object) -> str: return "multistream" -def parse_num_streams(num_streams: int, num_cores: int, scenario: str, logger: object): +def parse_num_streams(num_streams: int, num_cores: int, scenario: str): # If model.num_streams is set, and the scenario is either "multi_stream" or # "elastic", use the value of num_streams given to us by the model, otherwise # use a semi-sane default value. if scenario == "sync" or scenario == "singlestream": if num_streams and num_streams > 1: - logger.info("num_streams reduced to 1 for singlestream scenario.") + _LOGGER.info("num_streams reduced to 1 for singlestream scenario.") return 1 else: if num_streams: return num_streams else: default_num_streams = max(1, int(num_cores / 2)) - logger.info( + _LOGGER.info( "num_streams default value chosen of {}. " "This requires tuning and may be sub-optimal".format( default_num_streams diff --git a/tests/test_pipeline_benchmark.py b/tests/test_pipeline_benchmark.py new file mode 100644 index 0000000000..f4b19f4edf --- /dev/null +++ b/tests/test_pipeline_benchmark.py @@ -0,0 +1,154 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +from typing import List + +import numpy + +import pytest +from deepsparse import Pipeline +from deepsparse.benchmark.benchmark_pipeline import calculate_section_stats +from deepsparse.benchmark.data_creation import ( + SchemaType, + generate_image_data, + generate_question_data, + generate_text_data, + get_input_schema_type, +) +from deepsparse.utils import StagedTimer +from tests.helpers import run_command + + +@pytest.mark.parametrize( + ("pipeline_id", "model_stub", "additional_opts"), + [ + ( + "text_classification", + "zoo:nlp/sentiment_analysis/distilbert-none/pytorch/huggingface/" + "sst2/pruned90-none", + ["-c", "tests/test_data/pipeline_bench_config.json", "-b", "4"], + ), + ( + "image_classification", + "zoo:cv/classification/resnet_v1-50_2x/pytorch/sparseml/imagenet/base-none", + ["-c", "tests/test_data/pipeline_bench_config.json", "-s", "async"], + ), + ( + "question_answering", + "zoo:nlp/question_answering/bert-base/pytorch/huggingface/squad/" + "12layer_pruned80_quant-none-vnni", + [ + "-c", + "tests/test_data/pipeline_bench_config.json", + "-t", + "5", + "-s", + "elastic", + ], + ), + ( + "token_classification", + "zoo:nlp/token_classification/distilbert-none/pytorch/huggingface/" + "conll2003/pruned90-none", + ["-c", "tests/test_data/pipeline_bench_config.json", "-t", "3"], + ), + ], +) +def test_pipeline_benchmark( + pipeline_id: str, model_stub: str, additional_opts: List[str] +): + cmd = [ + "deepsparse.benchmark_pipeline", + pipeline_id, + model_stub, + *additional_opts, + ] + print(f"\n==== test_benchmark command ====\n{' '.join(cmd)}") + res = run_command(cmd) + if res.stdout is not None: + print(f"\n==== test_benchmark output ====\n{res.stdout}") + assert res.returncode == 0 + assert "error" not in res.stdout.lower() + assert "fail" not in res.stdout.lower() + + +def test_generate_image_data(): + batch_size = 32 + config = {"input_image_shape": (600, 600, 1)} + image_data = generate_image_data(config, batch_size) + assert len(image_data) == batch_size + img = image_data[0] + assert img.shape == (600, 600, 1) + assert img.dtype == numpy.uint8 + assert numpy.max(img) < 255 and numpy.min(img) >= 0 + + +def test_generate_text_data(): + batch_size = 16 + avg_word_len = 8 + config = {"gen_sequence_length": 250} + text_data = generate_text_data(config, batch_size, avg_word_len=avg_word_len) + assert len(text_data) == batch_size + text = text_data[0] + assert len(text) == 250 + num_spaces = text.count(" ") + assert num_spaces == int(len(text) / avg_word_len) + + +def test_generate_question_data(): + avg_word_len = 10 + config = {"gen_sequence_length": 50} + question, context = generate_question_data(config, avg_word_len=avg_word_len) + assert len(question) == config["gen_sequence_length"] + assert len(context) == config["gen_sequence_length"] + num_q_spaces = question.count(" ") + num_c_spaces = context.count(" ") + assert num_q_spaces == num_c_spaces == int(len(question) / avg_word_len) + + +@pytest.mark.parametrize( + ("task_name", "input_schema"), + [ + ("yolo", SchemaType.IMAGE), + ("text_classification", SchemaType.TEXT_SEQ), + ("transformers_embedding_extraction", SchemaType.TEXT_INPUT), + ("question_answering", SchemaType.QUESTION), + ], +) +def test_get_input_schema_type(task_name, input_schema): + pipeline = Pipeline.create(task=task_name) + assert get_input_schema_type(pipeline) == input_schema + + +def test_calculations(): + batch_times = [] + for i in range(5): + timer = StagedTimer() + timer._staged_start_times["stage_1"] = [i + 0.1] + timer._staged_stop_times["stage_1"] = [i + 0.5] + + timer._staged_start_times["stage_2"] = [i + 0.6] + timer._staged_stop_times["stage_2"] = [i + 0.9] + + batch_times.append(timer) + + total_run_time = 6.0 + section_stats = calculate_section_stats(batch_times, total_run_time) + assert math.isclose( + section_stats["stage_1"]["total_percentage"], 33.33, rel_tol=0.05 + ) + assert math.isclose(section_stats["stage_2"]["total_percentage"], 25, rel_tol=0.05) + assert math.isclose(section_stats["stage_1"]["mean"], 400, rel_tol=0.05) + assert math.isclose(section_stats["stage_2"]["median"], 300, rel_tol=0.05) From 2ed018574c83fe68caffd8cd34c13446623afb93 Mon Sep 17 00:00:00 2001 From: Sara Adkins Date: Fri, 28 Jul 2023 16:22:10 -0400 Subject: [PATCH 14/37] adding missing test file --- tests/test_data/pipeline_bench_config.json | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 tests/test_data/pipeline_bench_config.json diff --git a/tests/test_data/pipeline_bench_config.json b/tests/test_data/pipeline_bench_config.json new file mode 100644 index 0000000000..5886762cea --- /dev/null +++ b/tests/test_data/pipeline_bench_config.json @@ -0,0 +1,10 @@ +{ + "data_type": "dummy", + "gen_sequence_length": 100, + "input_image_shape": [500,500,3], + "data_folder": "/home/sadkins/imagenette2-320/", + "recursive_search": true, + "max_string_length": -1, + "pipeline_kwargs": {}, + "input_schema_kwargs": {} +} \ No newline at end of file From 729447e57652a3a81780f1511b3bd2b56c3efe5f Mon Sep 17 00:00:00 2001 From: Sara Adkins Date: Mon, 31 Jul 2023 10:58:21 -0400 Subject: [PATCH 15/37] skipping test w/high memory usage --- tests/test_pipeline_benchmark.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_pipeline_benchmark.py b/tests/test_pipeline_benchmark.py index f4b19f4edf..cabee84196 100644 --- a/tests/test_pipeline_benchmark.py +++ b/tests/test_pipeline_benchmark.py @@ -31,6 +31,7 @@ from tests.helpers import run_command +@pytest.mark.skip(reason="High memory usage, causes GitHub test run to be killed") @pytest.mark.parametrize( ("pipeline_id", "model_stub", "additional_opts"), [ From abb4811fd8dcfb159b7778fed3cb56447153e811 Mon Sep 17 00:00:00 2001 From: Sara Adkins Date: Mon, 31 Jul 2023 11:06:13 -0400 Subject: [PATCH 16/37] skip test with high memory usage --- tests/test_pipeline_benchmark.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_pipeline_benchmark.py b/tests/test_pipeline_benchmark.py index cabee84196..06235d789d 100644 --- a/tests/test_pipeline_benchmark.py +++ b/tests/test_pipeline_benchmark.py @@ -31,7 +31,6 @@ from tests.helpers import run_command -@pytest.mark.skip(reason="High memory usage, causes GitHub test run to be killed") @pytest.mark.parametrize( ("pipeline_id", "model_stub", "additional_opts"), [ @@ -67,6 +66,7 @@ ), ], ) +@pytest.mark.skip(reason="High memory usage, causes GitHub test run to be killed") def test_pipeline_benchmark( pipeline_id: str, model_stub: str, additional_opts: List[str] ): From 8cdbe9bbbe29248cc6058e910c6bc2c78b8f92fd Mon Sep 17 00:00:00 2001 From: Sara Adkins Date: Mon, 31 Jul 2023 11:31:09 -0400 Subject: [PATCH 17/37] unit test memory --- tests/test_pipeline_benchmark.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_pipeline_benchmark.py b/tests/test_pipeline_benchmark.py index 06235d789d..75a6bc1960 100644 --- a/tests/test_pipeline_benchmark.py +++ b/tests/test_pipeline_benchmark.py @@ -128,6 +128,7 @@ def test_generate_question_data(): ("question_answering", SchemaType.QUESTION), ], ) +@pytest.mark.skip(reason="High memory usage, causes GitHub test run to be killed") def test_get_input_schema_type(task_name, input_schema): pipeline = Pipeline.create(task=task_name) assert get_input_schema_type(pipeline) == input_schema From 1058f0b6aa33fe7a0fcba2d6d7d70013407e35b4 Mon Sep 17 00:00:00 2001 From: Sara Adkins Date: Mon, 31 Jul 2023 11:56:39 -0400 Subject: [PATCH 18/37] add tests back in --- tests/test_pipeline_benchmark.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_pipeline_benchmark.py b/tests/test_pipeline_benchmark.py index 75a6bc1960..ea749027e6 100644 --- a/tests/test_pipeline_benchmark.py +++ b/tests/test_pipeline_benchmark.py @@ -66,7 +66,6 @@ ), ], ) -@pytest.mark.skip(reason="High memory usage, causes GitHub test run to be killed") def test_pipeline_benchmark( pipeline_id: str, model_stub: str, additional_opts: List[str] ): From 249e6452484ebf552037f261f4abd1b9aa237775 Mon Sep 17 00:00:00 2001 From: Sara Adkins Date: Mon, 31 Jul 2023 11:59:24 -0400 Subject: [PATCH 19/37] add tests back in --- tests/test_pipeline_benchmark.py | 36 ++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/tests/test_pipeline_benchmark.py b/tests/test_pipeline_benchmark.py index ea749027e6..aadf3afb71 100644 --- a/tests/test_pipeline_benchmark.py +++ b/tests/test_pipeline_benchmark.py @@ -38,31 +38,45 @@ "text_classification", "zoo:nlp/sentiment_analysis/distilbert-none/pytorch/huggingface/" "sst2/pruned90-none", - ["-c", "tests/test_data/pipeline_bench_config.json", "-b", "4"], + [ + "-c", + "tests/test_data/pipeline_bench_config.json", + "-b", + "4", + "-t", + "3", + "-w", + "0.5", + ], ), ( "image_classification", "zoo:cv/classification/resnet_v1-50_2x/pytorch/sparseml/imagenet/base-none", - ["-c", "tests/test_data/pipeline_bench_config.json", "-s", "async"], - ), - ( - "question_answering", - "zoo:nlp/question_answering/bert-base/pytorch/huggingface/squad/" - "12layer_pruned80_quant-none-vnni", [ "-c", "tests/test_data/pipeline_bench_config.json", - "-t", - "5", "-s", - "elastic", + "async", + "-t", + "3", + "-w", + "0.5", ], ), ( "token_classification", "zoo:nlp/token_classification/distilbert-none/pytorch/huggingface/" "conll2003/pruned90-none", - ["-c", "tests/test_data/pipeline_bench_config.json", "-t", "3"], + [ + "-c", + "tests/test_data/pipeline_bench_config.json", + "-s", + "elastic", + "-t", + "3", + "-w", + "0.5", + ], ), ], ) From ba8688b4be776a1783cb08e883de1cf5b6ef67e6 Mon Sep 17 00:00:00 2001 From: Sara Adkins Date: Mon, 31 Jul 2023 12:50:11 -0400 Subject: [PATCH 20/37] fix async percentages --- src/deepsparse/benchmark/benchmark_pipeline.py | 18 +++++++++++------- tests/test_pipeline_benchmark.py | 8 ++++---- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/src/deepsparse/benchmark/benchmark_pipeline.py b/src/deepsparse/benchmark/benchmark_pipeline.py index 9cb8a9ed25..0f6b554df2 100644 --- a/src/deepsparse/benchmark/benchmark_pipeline.py +++ b/src/deepsparse/benchmark/benchmark_pipeline.py @@ -407,10 +407,12 @@ def benchmark_pipeline( "Generated no batch timings, try extending benchmark time with '--time'" ) - return batch_times, total_run_time + return batch_times, total_run_time, num_streams -def calculate_statistics(batch_times_ms: List[float], total_run_time_ms: float) -> Dict: +def calculate_statistics( + batch_times_ms: List[float], total_run_time_ms: float, num_streams: int +) -> Dict: percentiles = [25.0, 50.0, 75.0, 90.0, 95.0, 99.0, 99.9] buckets = numpy.percentile(batch_times_ms, percentiles).tolist() percentiles_dict = { @@ -418,7 +420,7 @@ def calculate_statistics(batch_times_ms: List[float], total_run_time_ms: float) } benchmark_dict = { - "total_percentage": sum(batch_times_ms) / total_run_time_ms * 100, + "total_percentage": sum(batch_times_ms) / total_run_time_ms * 100 * num_streams, "median": numpy.median(batch_times_ms), "mean": numpy.mean(batch_times_ms), "std": numpy.std(batch_times_ms), @@ -428,7 +430,7 @@ def calculate_statistics(batch_times_ms: List[float], total_run_time_ms: float) def calculate_section_stats( - batch_times: List[StagedTimer], total_run_time: float + batch_times: List[StagedTimer], total_run_time: float, num_streams: int ) -> Dict[str, Dict]: compute_sections = batch_times[0].stages total_run_time_ms = total_run_time * 1000 @@ -436,7 +438,9 @@ def calculate_section_stats( sections = {} for section in compute_sections: section_times = [st.times[section] * 1000 for st in batch_times] - sections[section] = calculate_statistics(section_times, total_run_time_ms) + sections[section] = calculate_statistics( + section_times, total_run_time_ms, num_streams + ) return sections @@ -450,7 +454,7 @@ def main(): print("Batch Size: {}".format(args.batch_size)) print("Scenario: {}".format(args.scenario)) - batch_times, total_run_time = benchmark_pipeline( + batch_times, total_run_time, num_streams = benchmark_pipeline( model_path=args.model_path, task=args.task_name, config=config, @@ -465,7 +469,7 @@ def main(): quiet=args.quiet, ) - section_stats = calculate_section_stats(batch_times, total_run_time) + section_stats = calculate_section_stats(batch_times, total_run_time, num_streams) items_per_sec = (len(batch_times) * args.batch_size) / total_run_time benchmark_results = { diff --git a/tests/test_pipeline_benchmark.py b/tests/test_pipeline_benchmark.py index aadf3afb71..698e50c927 100644 --- a/tests/test_pipeline_benchmark.py +++ b/tests/test_pipeline_benchmark.py @@ -46,7 +46,7 @@ "-t", "3", "-w", - "0.5", + "1", ], ), ( @@ -60,7 +60,7 @@ "-t", "3", "-w", - "0.5", + "1", ], ), ( @@ -75,7 +75,7 @@ "-t", "3", "-w", - "0.5", + "1", ], ), ], @@ -160,7 +160,7 @@ def test_calculations(): batch_times.append(timer) total_run_time = 6.0 - section_stats = calculate_section_stats(batch_times, total_run_time) + section_stats = calculate_section_stats(batch_times, total_run_time, 1) assert math.isclose( section_stats["stage_1"]["total_percentage"], 33.33, rel_tol=0.05 ) From ecf15590936db9f9928efedd7b2e779591f6255c Mon Sep 17 00:00:00 2001 From: Sara Adkins Date: Mon, 31 Jul 2023 14:51:23 -0400 Subject: [PATCH 21/37] fix new quality errors --- src/deepsparse/utils/data.py | 2 +- tests/deepsparse/pipelines/test_dynamic_batch_pipeline.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/deepsparse/utils/data.py b/src/deepsparse/utils/data.py index df08fb5e0c..a8d463f7b9 100644 --- a/src/deepsparse/utils/data.py +++ b/src/deepsparse/utils/data.py @@ -101,7 +101,7 @@ def verify_outputs( raise Exception( f"Output shapes don't match, {output.shape} != {gt_output.shape}" ) - if type(output) != type(gt_output): + if type(output) is not type(gt_output): raise Exception( f"Output types don't match, {type(output)} != {type(gt_output)}" ) diff --git a/tests/deepsparse/pipelines/test_dynamic_batch_pipeline.py b/tests/deepsparse/pipelines/test_dynamic_batch_pipeline.py index 1d08fd9832..77f451da81 100644 --- a/tests/deepsparse/pipelines/test_dynamic_batch_pipeline.py +++ b/tests/deepsparse/pipelines/test_dynamic_batch_pipeline.py @@ -35,7 +35,7 @@ def compare(expected, actual): - assert type(expected) == type(actual) + assert type(expected) is type(actual) if isinstance(expected, (list, float, numpy.ndarray)): expected_np = numpy.asarray(expected, dtype=float) From e0f6ab34142fb99d4189f5515713e4cf20e9d9c6 Mon Sep 17 00:00:00 2001 From: Sara Adkins Date: Tue, 1 Aug 2023 12:08:38 -0400 Subject: [PATCH 22/37] pass num_streams, fix percentage calculation for async --- src/deepsparse/benchmark/benchmark_pipeline.py | 4 +++- src/deepsparse/pipeline.py | 5 +++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/deepsparse/benchmark/benchmark_pipeline.py b/src/deepsparse/benchmark/benchmark_pipeline.py index 0f6b554df2..80956ff18f 100644 --- a/src/deepsparse/benchmark/benchmark_pipeline.py +++ b/src/deepsparse/benchmark/benchmark_pipeline.py @@ -377,6 +377,7 @@ def benchmark_pipeline( engine_type=engine, scheduler=scheduler, num_cores=num_cores, + num_streams=num_streams, **kwargs, ) inputs = create_input_schema(pipeline, input_type, batch_size, config) @@ -419,8 +420,9 @@ def calculate_statistics( "{:2.1f}%".format(key): value for key, value in zip(percentiles, buckets) } + scaled_runtime = total_run_time_ms * num_streams benchmark_dict = { - "total_percentage": sum(batch_times_ms) / total_run_time_ms * 100 * num_streams, + "total_percentage": sum(batch_times_ms) / scaled_runtime * 100, "median": numpy.median(batch_times_ms), "mean": numpy.mean(batch_times_ms), "std": numpy.std(batch_times_ms), diff --git a/src/deepsparse/pipeline.py b/src/deepsparse/pipeline.py index 8a88dacbca..f5ff41a894 100644 --- a/src/deepsparse/pipeline.py +++ b/src/deepsparse/pipeline.py @@ -121,6 +121,9 @@ class PipelineImplementation(Pipeline): dynamic batch mode (Pipeline will accept any batch size). Default is 1 :param num_cores: number of CPU cores to allocate for inference engine. None specifies all available cores. Default is None + :param num_streams: The max number of requests the model can handle + concurrently. None or 0 implies a scheduler-defined default value; + default None :param scheduler: (deepsparse only) kind of scheduler to execute with. Pass None for the default :param input_shapes: list of shapes to set ONNX the inputs to. Pass None @@ -146,6 +149,7 @@ def __init__( engine_type: str = DEEPSPARSE_ENGINE, batch_size: Optional[int] = 1, num_cores: int = None, + num_streams: int = None, scheduler: Scheduler = None, input_shapes: List[List[int]] = None, context: Optional[Context] = None, @@ -181,6 +185,7 @@ def __init__( batch_size=self._batch_size or 1, # bs=1 for dynamic batch num_cores=num_cores, input_shapes=input_shapes, + num_streams=num_streams, ) if engine_type.lower() == DEEPSPARSE_ENGINE: self._engine_args["scheduler"] = scheduler From 9473b79ac8ab4ebaa12d73e8f547c1193dfd24aa Mon Sep 17 00:00:00 2001 From: Sara Adkins Date: Tue, 1 Aug 2023 13:18:15 -0400 Subject: [PATCH 23/37] fix for file loading --- src/deepsparse/benchmark/data_creation.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/deepsparse/benchmark/data_creation.py b/src/deepsparse/benchmark/data_creation.py index 502a6ca805..769cb73404 100644 --- a/src/deepsparse/benchmark/data_creation.py +++ b/src/deepsparse/benchmark/data_creation.py @@ -73,10 +73,10 @@ def get_input_schema_type(pipeline: Pipeline) -> str: def get_files_with_endings( - folder: str, num_files: int, recursive: bool, file_endings: List[str] + folder: str, num_files: int, recursive: bool, file_endings: Tuple[str] ) -> List[str]: files = [] - for f in glob.glob(folder + "/**", recursivere=recursive): + for f in glob.glob(folder + "/**", recursive=recursive): if f.lower().endswith(file_endings): files.append(f) if len(files) < num_files: @@ -116,7 +116,7 @@ def load_image_data(config: Dict, batch_size: int) -> List[str]: path_to_data = config["data_folder"] recursive_search = config["recursive_search"] return get_files_with_endings( - path_to_data, batch_size, recursive_search, [".jpg", ".jpeg", ".gif"] + path_to_data, batch_size, recursive_search, (".jpg", ".jpeg", ".gif") ) @@ -138,7 +138,7 @@ def load_text_data(config: Dict, batch_size: int) -> List[str]: path_to_data = config["data_folder"] recursive_search = config["recursive_search"] input_files = get_files_with_endings( - path_to_data, batch_size, recursive_search, [".txt"] + path_to_data, batch_size, recursive_search, (".txt") ) if "max_string_length" in config: max_string_length = config["max_string_length"] From cc8de6aefb9d9c01febfb53444a64c3337026d3e Mon Sep 17 00:00:00 2001 From: Sara Adkins Date: Tue, 1 Aug 2023 14:34:31 -0400 Subject: [PATCH 24/37] PR comments --- .../benchmark/benchmark_pipeline.py | 39 ++++++++++++++----- src/deepsparse/benchmark/helpers.py | 6 +-- src/deepsparse/pipeline.py | 3 +- 3 files changed, 34 insertions(+), 14 deletions(-) diff --git a/src/deepsparse/benchmark/benchmark_pipeline.py b/src/deepsparse/benchmark/benchmark_pipeline.py index 80956ff18f..9db297b25b 100644 --- a/src/deepsparse/benchmark/benchmark_pipeline.py +++ b/src/deepsparse/benchmark/benchmark_pipeline.py @@ -122,8 +122,10 @@ DEEPSPARSE_ENGINE = "deepsparse" ORT_ENGINE = "onnxruntime" -DUMMY_INPUT_TYPE = "dummy" -REAL_INPUT_TYPE = "real" + +class PipelineInputType: + DUMMY: str = "dummy" + REAL: str = "real" def parse_args(): @@ -242,6 +244,13 @@ def parse_args(): class PipelineExecutorThread(threading.Thread): + """ + Run pipeline reoeatedly on inputs for max_time seconds, pushing the timer data to + the timer queue to store the runtime of each section of the pipeline. + + For intended usage, see multistream_benchmark + """ + def __init__( self, pipeline: Pipeline, @@ -264,6 +273,10 @@ def run(self): def singlestream_benchmark( pipeline: Pipeline, inputs: List[any], seconds_to_run: float ) -> List[StagedTimer]: + """ + Run pipeline repeatedly on inputs for max_time seconds, storing the runtime of each + section of the pipeline in batch_timings + """ benchmark_end_time = time.perf_counter() + seconds_to_run batch_timings = [] while time.perf_counter() < benchmark_end_time: @@ -279,6 +292,10 @@ def multistream_benchmark( seconds_to_run: float, num_streams: int, ) -> List[StagedTimer]: + """ + Create num_streams threads, each of which calls PipelineExecutorThread.run() for + seconds_to_run seconds. Stores all timing info in a shared queue. + """ time_queue = queue.Queue() max_time = time.perf_counter() + seconds_to_run threads = [] @@ -287,7 +304,7 @@ def multistream_benchmark( threads.append(PipelineExecutorThread(pipeline, inputs, time_queue, max_time)) for thread in threads: - thread.start() + thread.start() # triggers PipelineExecutorThread.run() for thread in threads: thread.join() @@ -296,14 +313,14 @@ def multistream_benchmark( def create_input_schema( - pipeline: Pipeline, input_type: str, batch_size: int, config: Dict + pipeline: Pipeline, input_type: PipelineInputType, batch_size: int, config: Dict ) -> any: input_schema_requirement = get_input_schema_type(pipeline) kwargs = {} if "input_schema_kwargs" in config: kwargs = config["input_schema_kwargs"] - if input_type == DUMMY_INPUT_TYPE: + if input_type == PipelineInputType.DUMMY: if input_schema_requirement == SchemaType.IMAGE: input_data = generate_image_data(config, batch_size) inputs = pipeline.input_schema(images=input_data, **kwargs) @@ -319,7 +336,7 @@ def create_input_schema( ) question, context = generate_question_data(config) inputs = pipeline.input_schema(question=question, context=context, **kwargs) - elif input_type == REAL_INPUT_TYPE: + elif input_type == PipelineInputType.REAL: if input_schema_requirement == SchemaType.IMAGE: input_data = load_image_data(config, batch_size) inputs = pipeline.input_schema(images=input_data, **kwargs) @@ -367,6 +384,8 @@ def benchmark_pipeline( scheduler = parse_scheduler(scenario) num_streams = parse_num_streams(num_streams, num_cores, scenario) + if "data_type" not in config: + raise Exception("Data type(dummy or real) must be specified in config") input_type = config["data_type"] kwargs = {} if "pipeline_kwargs" in config: @@ -451,10 +470,10 @@ def main(): args = parse_args() config = parse_input_config(args.input_config) - print("Original Model Path: {}".format(args.model_path)) - print("Task: {}".format(args.task_name)) - print("Batch Size: {}".format(args.batch_size)) - print("Scenario: {}".format(args.scenario)) + _LOGGER.info("Original Model Path: {}".format(args.model_path)) + _LOGGER.info("Task: {}".format(args.task_name)) + _LOGGER.info("Batch Size: {}".format(args.batch_size)) + _LOGGER.info("Scenario: {}".format(args.scenario)) batch_times, total_run_time, num_streams = benchmark_pipeline( model_path=args.model_path, diff --git a/src/deepsparse/benchmark/helpers.py b/src/deepsparse/benchmark/helpers.py index 6702d269d7..675226710b 100644 --- a/src/deepsparse/benchmark/helpers.py +++ b/src/deepsparse/benchmark/helpers.py @@ -22,9 +22,6 @@ _LOGGER = logging.getLogger(__name__) -DEFAULT_STRING_LENGTH = 50 -DEFAULT_IMAGE_SHAPE = (240, 240, 3) - __all__ = [ "decide_thread_pinning", "parse_scheduler", @@ -33,6 +30,9 @@ "parse_input_config", ] +DEFAULT_STRING_LENGTH = 50 +DEFAULT_IMAGE_SHAPE = (240, 240, 3) + def decide_thread_pinning(pinning_mode: str) -> None: pinning_mode = pinning_mode.lower() diff --git a/src/deepsparse/pipeline.py b/src/deepsparse/pipeline.py index bddad32b55..483aa68410 100644 --- a/src/deepsparse/pipeline.py +++ b/src/deepsparse/pipeline.py @@ -187,10 +187,10 @@ def __init__( batch_size=self._batch_size or 1, # bs=1 for dynamic batch num_cores=num_cores, input_shapes=input_shapes, - num_streams=num_streams, ) if engine_type.lower() == DEEPSPARSE_ENGINE: self._engine_args["scheduler"] = scheduler + self._engine_args["num_streams"] = num_streams self.onnx_file_path = self.setup_onnx_file_path() @@ -711,6 +711,7 @@ def create_engine( if context is not None and isinstance(context, Context): engine_args.pop("num_cores", None) engine_args.pop("scheduler", None) + engine_args.pop("num_streams", None) engine_args["context"] = context return MultiModelEngine( model=onnx_file_path, From b5ec9ae1dce596001a5b4931f52fd5a853323e84 Mon Sep 17 00:00:00 2001 From: Sara Adkins Date: Tue, 1 Aug 2023 15:00:58 -0400 Subject: [PATCH 25/37] PR comments --- .../benchmark/benchmark_pipeline.py | 31 ++++++++-------- src/deepsparse/benchmark/data_creation.py | 35 +++++++++---------- src/deepsparse/benchmark/helpers.py | 14 +++----- 3 files changed, 35 insertions(+), 45 deletions(-) diff --git a/src/deepsparse/benchmark/benchmark_pipeline.py b/src/deepsparse/benchmark/benchmark_pipeline.py index 9db297b25b..9af7bc4d54 100644 --- a/src/deepsparse/benchmark/benchmark_pipeline.py +++ b/src/deepsparse/benchmark/benchmark_pipeline.py @@ -470,10 +470,11 @@ def main(): args = parse_args() config = parse_input_config(args.input_config) - _LOGGER.info("Original Model Path: {}".format(args.model_path)) - _LOGGER.info("Task: {}".format(args.task_name)) - _LOGGER.info("Batch Size: {}".format(args.batch_size)) - _LOGGER.info("Scenario: {}".format(args.scenario)) + _LOGGER.info("Original Model Path: %s" % args.model_path) + _LOGGER.info("Task: %s" % args.task_name) + _LOGGER.info("Batch Size: %d" % args.batch_size) + _LOGGER.info("Scenario: %s" % args.scenario) + _LOGGER.info("Requested Run Time(sec): %d" % args.time) batch_times, total_run_time, num_streams = benchmark_pipeline( model_path=args.model_path, @@ -516,30 +517,26 @@ def main(): # Export results export_path = args.export_path if export_path: - _LOGGER.info("Saving benchmark results to JSON file at {}".format(export_path)) + _LOGGER.info("Saving benchmark results to JSON file at %s" % export_path) with open(export_path, "w") as out: json.dump(export_dict, out, indent=2) # Results summary - print("Original Model Path: {}".format(args.model_path)) - print("Batch Size: {}".format(args.batch_size)) - print("Scenario: {}".format(args.scenario)) - print("Iterations: {}".format(int(benchmark_results["iterations"]))) - print("Total Runtime: {:.4f}".format(total_run_time)) - print("Throughput (items/sec): {:.4f}".format(benchmark_results["items_per_sec"])) + print("Original Model Path: %s" % args.model_path) + print("Batch Size: %d" % args.batch_size) + print("Scenario: %s" % args.scenario) + print("Iterations: %d" % int(benchmark_results["iterations"])) + print("Total Runtime: %.4f" % total_run_time) + print("Throughput (items/sec): %.4f" % benchmark_results["items_per_sec"]) print("Processing Time Breakdown: ") compute_sections = batch_times[0].stages for section in compute_sections: - print( - " {}: {:.2f}%".format( - section, section_stats[section]["total_percentage"] - ) - ) + print(" %s: %.2f" % (section, section_stats[section]["total_percentage"])) print("Mean Latency Breakdown (ms/batch): ") for section in compute_sections: - print(" {}: {:.4f}".format(section, section_stats[section]["mean"])) + print(" %s: %.4f" % (section, section_stats[section]["mean"])) if __name__ == "__main__": diff --git a/src/deepsparse/benchmark/data_creation.py b/src/deepsparse/benchmark/data_creation.py index 769cb73404..886d617310 100644 --- a/src/deepsparse/benchmark/data_creation.py +++ b/src/deepsparse/benchmark/data_creation.py @@ -101,7 +101,7 @@ def generate_image_data(config: Dict, batch_size: int) -> List[numpy.ndarray]: image_shape = config["input_image_shape"] else: image_shape = DEFAULT_IMAGE_SHAPE - _LOGGER.warning("Using default image shape {}".format(image_shape)) + _LOGGER.warning("Using default image shape %d" % image_shape) for _ in range(batch_size): rand_array = numpy.random.randint(0, high=255, size=image_shape).astype( @@ -121,16 +121,16 @@ def load_image_data(config: Dict, batch_size: int) -> List[str]: def generate_text_data(config: Dict, batch_size: int, avg_word_len=5) -> List[str]: - input_data = [] if "gen_sequence_length" in config: string_length = config["gen_sequence_length"] else: string_length = DEFAULT_STRING_LENGTH - _LOGGER.warning("Using default string length {}".format(string_length)) - for _ in range(batch_size): - rand_sentence = generate_sentence(string_length, avg_word_length=avg_word_len) - input_data.append(rand_sentence) + _LOGGER.warning("Using default string length %d" % string_length) + input_data = [ + generate_sentence(string_length, avg_word_length=avg_word_len) + for _ in range(batch_size) + ] return input_data @@ -144,13 +144,12 @@ def load_text_data(config: Dict, batch_size: int) -> List[str]: max_string_length = config["max_string_length"] else: max_string_length = -1 - _LOGGER.warning("Using default max string length {}".format(max_string_length)) + _LOGGER.warning("Using default max string length %d" % max_string_length) input_data = [] for f_path in input_files: - f = open(f_path) - text_data = f.read() - f.close() - input_data.append(text_data[:max_string_length]) + with open(f_path) as f: + text_data = f.read() + input_data.append(text_data[:max_string_length]) return input_data @@ -159,7 +158,7 @@ def generate_question_data(config: Dict, avg_word_len=5) -> Tuple[str, str]: string_length = config["gen_sequence_length"] else: string_length = DEFAULT_STRING_LENGTH - _LOGGER.warning("Using default string length {}".format(string_length)) + _LOGGER.warning("Using default string length %d" % string_length) question = generate_sentence(string_length, avg_word_length=avg_word_len) context = generate_sentence(string_length, avg_word_length=avg_word_len) return (question, context) @@ -169,10 +168,10 @@ def load_question_data(config: Dict) -> Tuple[str, str]: path_to_questions = config["question_file"] path_to_context = config["context_file"] - f_question = open(path_to_questions) - f_context = open(path_to_context) - question = f_question.read() - context = f_context.read() - f_question.close() - f_context.close() + question = "" + context = "" + with open(path_to_questions) as f: + question = f.read() + with open(path_to_context) as f: + context = f.read() return question, context diff --git a/src/deepsparse/benchmark/helpers.py b/src/deepsparse/benchmark/helpers.py index 675226710b..301e834480 100644 --- a/src/deepsparse/benchmark/helpers.py +++ b/src/deepsparse/benchmark/helpers.py @@ -49,9 +49,7 @@ def decide_thread_pinning(pinning_mode: str) -> None: _LOGGER.info("Thread pinning disabled, performance may be sub-optimal") else: _LOGGER.info( - "Recieved invalid option for thread_pinning '{}', skipping".format( - pinning_mode - ) + "Recieved invalid option for thread_pinning '%s', skipping" % pinning_mode ) @@ -77,9 +75,7 @@ def parse_scenario(scenario: str) -> str: return "elastic" else: _LOGGER.info( - "Recieved invalid option for scenario'{}', defaulting to async".format( - scenario - ) + "Recieved invalid option for scenario'%s', defaulting to async" % scenario ) return "multistream" @@ -98,10 +94,8 @@ def parse_num_streams(num_streams: int, num_cores: int, scenario: str): else: default_num_streams = max(1, int(num_cores / 2)) _LOGGER.info( - "num_streams default value chosen of {}. " - "This requires tuning and may be sub-optimal".format( - default_num_streams - ) + "num_streams default value chosen of %d. " + "This requires tuning and may be sub-optimal" % default_num_streams ) return default_num_streams From 99b4051efe5896590eb971e24a9bcd24ff341958 Mon Sep 17 00:00:00 2001 From: Sara Adkins Date: Tue, 1 Aug 2023 16:59:54 -0400 Subject: [PATCH 26/37] BaseModel for pipeline config --- .../benchmark/benchmark_pipeline.py | 48 +++++------ src/deepsparse/benchmark/config.py | 85 +++++++++++++++++++ src/deepsparse/benchmark/data_creation.py | 62 +++++++++----- src/deepsparse/benchmark/helpers.py | 8 +- 4 files changed, 155 insertions(+), 48 deletions(-) create mode 100644 src/deepsparse/benchmark/config.py diff --git a/src/deepsparse/benchmark/benchmark_pipeline.py b/src/deepsparse/benchmark/benchmark_pipeline.py index 9af7bc4d54..748f7b2545 100644 --- a/src/deepsparse/benchmark/benchmark_pipeline.py +++ b/src/deepsparse/benchmark/benchmark_pipeline.py @@ -90,8 +90,10 @@ from typing import Dict, List, Tuple import numpy +from pydantic import BaseModel from deepsparse import Pipeline, __version__ +from deepsparse.benchmark.config import PipelineBenchmarkConfig, PipelineInputType from deepsparse.benchmark.data_creation import ( SchemaType, generate_image_data, @@ -123,11 +125,6 @@ ORT_ENGINE = "onnxruntime" -class PipelineInputType: - DUMMY: str = "dummy" - REAL: str = "real" - - def parse_args(): parser = argparse.ArgumentParser(description="Benchmark DeepSparse Pipelines") parser.add_argument("task_name", type=str, help="Type of pipeline to run") @@ -313,12 +310,13 @@ def multistream_benchmark( def create_input_schema( - pipeline: Pipeline, input_type: PipelineInputType, batch_size: int, config: Dict -) -> any: + pipeline: Pipeline, + input_type: PipelineInputType, + batch_size: int, + config: PipelineBenchmarkConfig, +) -> BaseModel: input_schema_requirement = get_input_schema_type(pipeline) - kwargs = {} - if "input_schema_kwargs" in config: - kwargs = config["input_schema_kwargs"] + kwargs = config.input_schema_kwargs if input_type == PipelineInputType.DUMMY: if input_schema_requirement == SchemaType.IMAGE: @@ -331,9 +329,10 @@ def create_input_schema( input_data = generate_text_data(config, batch_size) inputs = pipeline.input_schema(inputs=input_data, **kwargs) elif input_schema_requirement == SchemaType.QUESTION: - _LOGGER.warn( - "Only batch size of 1 supported for Question Answering Pipeline" - ) + if batch_size != 1: + _LOGGER.warning( + "Only batch size of 1 supported for Question Answering Pipeline" + ) question, context = generate_question_data(config) inputs = pipeline.input_schema(question=question, context=context, **kwargs) elif input_type == PipelineInputType.REAL: @@ -341,15 +340,16 @@ def create_input_schema( input_data = load_image_data(config, batch_size) inputs = pipeline.input_schema(images=input_data, **kwargs) elif input_schema_requirement == SchemaType.TEXT_SEQ: - input_data = load_text_data(config) + input_data = load_text_data(config, batch_size) inputs = pipeline.input_schema(sequences=input_data, **kwargs) elif input_schema_requirement == SchemaType.TEXT_INPUT: input_data = load_text_data(config, batch_size) inputs = pipeline.input_schema(inputs=input_data, **kwargs) elif input_schema_requirement == SchemaType.QUESTION: - _LOGGER.warn( - "Only batch size of 1 supported for Question Answering Pipeline" - ) + if batch_size != 1: + _LOGGER.warning( + "Only batch size of 1 supported for Question Answering Pipeline" + ) question, context = load_question_data(config) inputs = pipeline.input_schema(question=question, context=context, **kwargs) else: @@ -361,7 +361,7 @@ def create_input_schema( def benchmark_pipeline( model_path: str, task: str, - config: Dict, + config: PipelineBenchmarkConfig, batch_size: int = 1, num_cores: int = None, scenario: str = "sync", @@ -384,12 +384,8 @@ def benchmark_pipeline( scheduler = parse_scheduler(scenario) num_streams = parse_num_streams(num_streams, num_cores, scenario) - if "data_type" not in config: - raise Exception("Data type(dummy or real) must be specified in config") - input_type = config["data_type"] - kwargs = {} - if "pipeline_kwargs" in config: - kwargs = config["pipeline_kwargs"] + input_type = config.data_type + kwargs = config.pipeline_kwargs pipeline = Pipeline.create( task=task, model_path=model_path, @@ -510,7 +506,7 @@ def main(): "scenario": args.scenario, "seconds_to_run": time, "num_streams": args.num_streams, - "input_config": config, + "input_config": dict(config), "benchmark_results": benchmark_results, } @@ -532,7 +528,7 @@ def main(): print("Processing Time Breakdown: ") compute_sections = batch_times[0].stages for section in compute_sections: - print(" %s: %.2f" % (section, section_stats[section]["total_percentage"])) + print(" %s: %.2f%%" % (section, section_stats[section]["total_percentage"])) print("Mean Latency Breakdown (ms/batch): ") for section in compute_sections: diff --git a/src/deepsparse/benchmark/config.py b/src/deepsparse/benchmark/config.py new file mode 100644 index 0000000000..6829116758 --- /dev/null +++ b/src/deepsparse/benchmark/config.py @@ -0,0 +1,85 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Dict, List, Optional + +from pydantic import BaseModel, Field + + +__all__ = ["PipelineInputType", "PipelineBenchmarkConfig"] + + +class PipelineInputType: + DUMMY: str = "dummy" + REAL: str = "real" + + +class PipelineBenchmarkConfig(BaseModel): + data_type: str = Field( + default=PipelineInputType.DUMMY, + description=( + "Type of data source, dummy to generate data or real to load from file." + ), + ) + + gen_sequence_length: Optional[int] = Field( + default=None, + description=( + "Number of characters to generate for pipelines that take text input." + ), + ) + + input_image_shape: Optional[List[int]] = Field( + default=None, + description=( + "Image size for pipelines that take image input, 3-dim with channel as the " + "last dimmension" + ), + ) + + data_folder: Optional[str] = Field( + default=None, + description=( + "Path to local folder of input data containing text or image files" + ), + ) + + recursive_search: bool = Field( + default=False, + description=("whether to recursively search through data_folder for files"), + ) + + max_string_length: int = Field( + default=-1, + description=( + "Maximum characters to read from each text file, -1 for no maximum" + ), + ) + + question_file: Optional[str] = Field( + default=None, description=("Path to text file to read question from") + ) + + context_file: Optional[str] = Field( + default=None, description=("Path to text file to read question context from") + ) + + pipeline_kwargs: Dict = Field( + default={}, description=("Additional arguments passed to pipeline creation") + ) + + input_schema_kwargs: Dict = Field( + default={}, + description=("Additional arguments passed to input schema creations "), + ) diff --git a/src/deepsparse/benchmark/data_creation.py b/src/deepsparse/benchmark/data_creation.py index 886d617310..8407c8cf0f 100644 --- a/src/deepsparse/benchmark/data_creation.py +++ b/src/deepsparse/benchmark/data_creation.py @@ -16,11 +16,13 @@ import logging import random import string +from os import path from typing import Dict, List, Tuple import numpy from deepsparse import Pipeline +from deepsparse.benchmark.config import PipelineBenchmarkConfig _LOGGER = logging.getLogger(__name__) @@ -75,6 +77,8 @@ def get_input_schema_type(pipeline: Pipeline) -> str: def get_files_with_endings( folder: str, num_files: int, recursive: bool, file_endings: Tuple[str] ) -> List[str]: + if not path.exists(folder): + raise Exception("Can't parse files, {} does not exist".format(folder)) files = [] for f in glob.glob(folder + "/**", recursive=recursive): if f.lower().endswith(file_endings): @@ -95,13 +99,17 @@ def generate_sentence(string_length: int, avg_word_length: int = 5): return "".join(random_chars) -def generate_image_data(config: Dict, batch_size: int) -> List[numpy.ndarray]: +def generate_image_data( + config: PipelineBenchmarkConfig, batch_size: int +) -> List[numpy.ndarray]: input_data = [] - if "input_image_shape" in config and len(config["input_image_shape"]) == 3: - image_shape = config["input_image_shape"] + if config.input_image_shape and len(config.input_image_shape) == 3: + image_shape = config.input_image_shape else: image_shape = DEFAULT_IMAGE_SHAPE - _LOGGER.warning("Using default image shape %d" % image_shape) + _LOGGER.warning( + f"Could not parse {config.input_image_shape}, Using default image shape {image_shape}" + ) for _ in range(batch_size): rand_array = numpy.random.randint(0, high=255, size=image_shape).astype( @@ -112,20 +120,24 @@ def generate_image_data(config: Dict, batch_size: int) -> List[numpy.ndarray]: return input_data -def load_image_data(config: Dict, batch_size: int) -> List[str]: - path_to_data = config["data_folder"] - recursive_search = config["recursive_search"] +def load_image_data(config: PipelineBenchmarkConfig, batch_size: int) -> List[str]: + if not config.data_folder: + raise Exception("Data folder must be defined for real inputs") + path_to_data = config.data_folder + recursive_search = config.recursive_search return get_files_with_endings( path_to_data, batch_size, recursive_search, (".jpg", ".jpeg", ".gif") ) -def generate_text_data(config: Dict, batch_size: int, avg_word_len=5) -> List[str]: - if "gen_sequence_length" in config: - string_length = config["gen_sequence_length"] +def generate_text_data( + config: PipelineBenchmarkConfig, batch_size: int, avg_word_len=5 +) -> List[str]: + if config.gen_sequence_length: + string_length = config.gen_sequence_length else: string_length = DEFAULT_STRING_LENGTH - _LOGGER.warning("Using default string length %d" % string_length) + _LOGGER.warning("Ssing default string length %d" % string_length) input_data = [ generate_sentence(string_length, avg_word_length=avg_word_len) @@ -134,14 +146,16 @@ def generate_text_data(config: Dict, batch_size: int, avg_word_len=5) -> List[st return input_data -def load_text_data(config: Dict, batch_size: int) -> List[str]: - path_to_data = config["data_folder"] - recursive_search = config["recursive_search"] +def load_text_data(config: PipelineBenchmarkConfig, batch_size: int) -> List[str]: + if not config.data_folder: + raise Exception("Data folder must be defined for real inputs") + path_to_data = config.data_folder + recursive_search = config.recursive_search input_files = get_files_with_endings( path_to_data, batch_size, recursive_search, (".txt") ) - if "max_string_length" in config: - max_string_length = config["max_string_length"] + if config.max_string_length: + max_string_length = config.max_string_length else: max_string_length = -1 _LOGGER.warning("Using default max string length %d" % max_string_length) @@ -153,9 +167,11 @@ def load_text_data(config: Dict, batch_size: int) -> List[str]: return input_data -def generate_question_data(config: Dict, avg_word_len=5) -> Tuple[str, str]: - if "gen_sequence_length" in config: - string_length = config["gen_sequence_length"] +def generate_question_data( + config: PipelineBenchmarkConfig, avg_word_len=5 +) -> Tuple[str, str]: + if config.gen_sequence_length: + string_length = config.gen_sequence_length else: string_length = DEFAULT_STRING_LENGTH _LOGGER.warning("Using default string length %d" % string_length) @@ -165,8 +181,12 @@ def generate_question_data(config: Dict, avg_word_len=5) -> Tuple[str, str]: def load_question_data(config: Dict) -> Tuple[str, str]: - path_to_questions = config["question_file"] - path_to_context = config["context_file"] + if not config.question_file or not config.context_file: + raise Exception( + "Question and context files must be defined for question_answering pieline" + ) + path_to_questions = config.question_file + path_to_context = config.context_file question = "" context = "" diff --git a/src/deepsparse/benchmark/helpers.py b/src/deepsparse/benchmark/helpers.py index 301e834480..ef6b6c92a5 100644 --- a/src/deepsparse/benchmark/helpers.py +++ b/src/deepsparse/benchmark/helpers.py @@ -17,7 +17,10 @@ import os from typing import Dict +from pydantic import ValidationError + from deepsparse import Scheduler +from deepsparse.benchmark.config import PipelineBenchmarkConfig _LOGGER = logging.getLogger(__name__) @@ -104,4 +107,7 @@ def parse_input_config(input_config_file: str) -> Dict[str, any]: config_file = open(input_config_file) config = json.load(config_file) config_file.close() - return config + try: + return PipelineBenchmarkConfig(**config) + except ValidationError as e: + _LOGGER.error(e) From 67d8187768d13eaf1212b7b71677baebfa458907 Mon Sep 17 00:00:00 2001 From: Sara Adkins Date: Tue, 1 Aug 2023 17:00:30 -0400 Subject: [PATCH 27/37] quality fix --- src/deepsparse/benchmark/data_creation.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/deepsparse/benchmark/data_creation.py b/src/deepsparse/benchmark/data_creation.py index 8407c8cf0f..dcdbc9d7a5 100644 --- a/src/deepsparse/benchmark/data_creation.py +++ b/src/deepsparse/benchmark/data_creation.py @@ -108,7 +108,8 @@ def generate_image_data( else: image_shape = DEFAULT_IMAGE_SHAPE _LOGGER.warning( - f"Could not parse {config.input_image_shape}, Using default image shape {image_shape}" + f"Could not parse {config.input_image_shape}, " + "Using default image shape {image_shape}" ) for _ in range(batch_size): From 8b9768e06f78f8fd87d268e31cd9ca06eb1b6721 Mon Sep 17 00:00:00 2001 From: Sara Adkins Date: Wed, 2 Aug 2023 10:12:32 -0400 Subject: [PATCH 28/37] fix broken test --- tests/test_pipeline_benchmark.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/tests/test_pipeline_benchmark.py b/tests/test_pipeline_benchmark.py index 698e50c927..68a1dcc3b3 100644 --- a/tests/test_pipeline_benchmark.py +++ b/tests/test_pipeline_benchmark.py @@ -20,6 +20,7 @@ import pytest from deepsparse import Pipeline from deepsparse.benchmark.benchmark_pipeline import calculate_section_stats +from deepsparse.benchmark.config import PipelineBenchmarkConfig from deepsparse.benchmark.data_creation import ( SchemaType, generate_image_data, @@ -100,7 +101,8 @@ def test_pipeline_benchmark( def test_generate_image_data(): batch_size = 32 - config = {"input_image_shape": (600, 600, 1)} + config_args = {"input_image_shape": (600, 600, 1)} + config = PipelineBenchmarkConfig(**config_args) image_data = generate_image_data(config, batch_size) assert len(image_data) == batch_size img = image_data[0] @@ -112,7 +114,8 @@ def test_generate_image_data(): def test_generate_text_data(): batch_size = 16 avg_word_len = 8 - config = {"gen_sequence_length": 250} + config_args = {"gen_sequence_length": 250} + config = PipelineBenchmarkConfig(**config_args) text_data = generate_text_data(config, batch_size, avg_word_len=avg_word_len) assert len(text_data) == batch_size text = text_data[0] @@ -123,10 +126,11 @@ def test_generate_text_data(): def test_generate_question_data(): avg_word_len = 10 - config = {"gen_sequence_length": 50} + config_args = {"gen_sequence_length": 50} + config = PipelineBenchmarkConfig(**config_args) question, context = generate_question_data(config, avg_word_len=avg_word_len) - assert len(question) == config["gen_sequence_length"] - assert len(context) == config["gen_sequence_length"] + assert len(question) == config.gen_sequence_length + assert len(context) == config.gen_sequence_length num_q_spaces = question.count(" ") num_c_spaces = context.count(" ") assert num_q_spaces == num_c_spaces == int(len(question) / avg_word_len) From 50d5a74dc219d957786ca85521ed304a11d5b01c Mon Sep 17 00:00:00 2001 From: Sara Adkins Date: Wed, 2 Aug 2023 16:43:32 -0400 Subject: [PATCH 29/37] cleanup code, replace argpase with click --- .../benchmark/benchmark_pipeline.py | 309 +++++++++--------- src/deepsparse/benchmark/data_creation.py | 14 +- tests/test_pipeline_benchmark.py | 3 +- 3 files changed, 162 insertions(+), 164 deletions(-) diff --git a/src/deepsparse/benchmark/benchmark_pipeline.py b/src/deepsparse/benchmark/benchmark_pipeline.py index 748f7b2545..6a445f5764 100644 --- a/src/deepsparse/benchmark/benchmark_pipeline.py +++ b/src/deepsparse/benchmark/benchmark_pipeline.py @@ -81,7 +81,6 @@ -c config.json -t 30 -s async """ -import argparse import json import logging import queue @@ -89,6 +88,7 @@ import time from typing import Dict, List, Tuple +import click import numpy from pydantic import BaseModel @@ -125,121 +125,6 @@ ORT_ENGINE = "onnxruntime" -def parse_args(): - parser = argparse.ArgumentParser(description="Benchmark DeepSparse Pipelines") - parser.add_argument("task_name", type=str, help="Type of pipeline to run") - parser.add_argument( - "model_path", - type=str, - help="Path to an ONNX model file or SparseZoo model stub", - ) - parser.add_argument( - "-c", - "--input_config", - type=str, - default="config.json", - help="JSON file containing schema for input data", - ) - parser.add_argument( - "-b", - "--batch_size", - type=int, - default=1, - help="The batch size to run the analysis for. Must be greater than 0", - ) - parser.add_argument( - "-ncores", - "--num_cores", - type=int, - default=cpu_architecture().num_available_physical_cores, - help=( - "The number of physical cores to run the analysis on, " - "defaults to all physical cores available on the system" - ), - ) - parser.add_argument( - "-s", - "--scenario", - type=str, - default="sync", - choices=["async", "sync", "elastic"], - help=( - "Choose between using the async, sync and elastic scenarios. Sync and " - "async are similar to the single-stream/multi-stream scenarios. Elastic " - "is a newer scenario that behaves similarly to the async scenario " - "but uses a different scheduling backend. Default value is sync." - ), - ) - parser.add_argument( - "-t", - "--time", - type=int, - default=10, - help="The number of seconds the benchmark will run. Default is 10 seconds.", - ) - parser.add_argument( - "-w", - "--warmup_time", - type=int, - default=2, - help=( - "The number of seconds the benchmark will warmup before running." - "Default is 2 seconds." - ), - ) - parser.add_argument( - "-nstreams", - "--num_streams", - type=int, - default=None, - help=( - "The number of streams that will submit inferences in parallel using " - "async scenario. Default is automatically determined for given hardware " - "and may be sub-optimal." - ), - ) - parser.add_argument( - "-pin", - "--thread_pinning", - type=str, - default="core", - choices=["none", "core", "numa"], - help=( - "Enable binding threads to cores ('core' the default), " - "threads to cores on sockets ('numa'), or disable ('none')" - ), - ) - parser.add_argument( - "-e", - "--engine", - type=str, - default=DEEPSPARSE_ENGINE, - help=( - "Inference engine backend to run eval on. Choices are 'deepsparse', " - "'onnxruntime'. Default is 'deepsparse'. Can also specify a user " - "defined engine class by giving the script and class name in the " - "following format :. This " - "engine class will be dynamically imported during runtime" - ), - ) - parser.add_argument( - "-q", - "--quiet", - help="Lower logging verbosity", - action="store_true", - default=False, - ) - parser.add_argument( - "-x", - "--export_path", - help="Store results into a JSON file", - type=str, - default=None, - ) - - return parser.parse_args() - - class PipelineExecutorThread(threading.Thread): """ Run pipeline reoeatedly on inputs for max_time seconds, pushing the timer data to @@ -329,11 +214,7 @@ def create_input_schema( input_data = generate_text_data(config, batch_size) inputs = pipeline.input_schema(inputs=input_data, **kwargs) elif input_schema_requirement == SchemaType.QUESTION: - if batch_size != 1: - _LOGGER.warning( - "Only batch size of 1 supported for Question Answering Pipeline" - ) - question, context = generate_question_data(config) + question, context = generate_question_data(config, batch_size) inputs = pipeline.input_schema(question=question, context=context, **kwargs) elif input_type == PipelineInputType.REAL: if input_schema_requirement == SchemaType.IMAGE: @@ -346,11 +227,7 @@ def create_input_schema( input_data = load_text_data(config, batch_size) inputs = pipeline.input_schema(inputs=input_data, **kwargs) elif input_schema_requirement == SchemaType.QUESTION: - if batch_size != 1: - _LOGGER.warning( - "Only batch size of 1 supported for Question Answering Pipeline" - ) - question, context = load_question_data(config) + question, context = load_question_data(config, batch_size) inputs = pipeline.input_schema(question=question, context=context, **kwargs) else: raise Exception(f"Unknown input type '{input_type}'") @@ -462,33 +339,149 @@ def calculate_section_stats( return sections -def main(): - args = parse_args() - config = parse_input_config(args.input_config) - - _LOGGER.info("Original Model Path: %s" % args.model_path) - _LOGGER.info("Task: %s" % args.task_name) - _LOGGER.info("Batch Size: %d" % args.batch_size) - _LOGGER.info("Scenario: %s" % args.scenario) - _LOGGER.info("Requested Run Time(sec): %d" % args.time) +@click.command() +@click.argument("task_name", type=str) +@click.argument("model_path", type=str) +@click.option( + "-c", + "--input_config", + type=str, + default="config.json", + help="JSON file containing schema for input data", +) +@click.option( + "-b", + "--batch_size", + type=int, + default=1, + help="The batch size to run the analysis for. Must be greater than 0", +) +@click.option( + "-ncores", + "--num_cores", + type=int, + default=cpu_architecture().num_available_physical_cores, + help=( + "The number of physical cores to run the analysis on, " + "defaults to all physical cores available on the system" + ), +) +@click.option( + "-s", + "--scenario", + type=str, + default="sync", + help=( + "Choose between using the async, sync and elastic scenarios. Sync and " + "async are similar to the single-stream/multi-stream scenarios. Elastic " + "is a newer scenario that behaves similarly to the async scenario " + "but uses a different scheduling backend. Default value is sync." + ), +) +@click.option( + "-t", + "--run_time", + type=int, + default=10, + help="The number of seconds the benchmark will run. Default is 10 seconds.", +) +@click.option( + "-w", + "--warmup_time", + type=int, + default=2, + help=( + "The number of seconds the benchmark will warmup before running." + "Default is 2 seconds." + ), +) +@click.option( + "-nstreams", + "--num_streams", + type=int, + default=None, + help=( + "The number of streams that will submit inferences in parallel using " + "async scenario. Default is automatically determined for given hardware " + "and may be sub-optimal." + ), +) +@click.option( + "-pin", + "--thread_pinning", + type=str, + default="core", + help=( + "Enable binding threads to cores ('core' the default), " + "threads to cores on sockets ('numa'), or disable ('none')" + ), +) +@click.option( + "-e", + "--engine", + type=str, + default=DEEPSPARSE_ENGINE, + help=( + "Inference engine backend to run eval on. Choices are 'deepsparse', " + "'onnxruntime'. Default is 'deepsparse'. Can also specify a user " + "defined engine class by giving the script and class name in the " + "following format :. This " + "engine class will be dynamically imported during runtime" + ), +) +@click.option( + "-q", + "--quiet", + help="Lower logging verbosity", + default=False, +) +@click.option( + "-x", + "--export_path", + help="Store results into a JSON file", + type=str, + default=None, +) +def main( + task_name: str, + model_path: str, + input_config: str, + batch_size: int, + num_cores: int, + scenario: str, + run_time: int, + warmup_time: int, + num_streams: int, + thread_pinning: str, + engine: str, + quiet: bool, + export_path: str, +): + config = parse_input_config(input_config) + + _LOGGER.info("Original Model Path: %s" % model_path) + _LOGGER.info("Task: %s" % task_name) + _LOGGER.info("Batch Size: %d" % batch_size) + _LOGGER.info("Scenario: %s" % scenario) + _LOGGER.info("Requested Run Time(sec): %d" % run_time) batch_times, total_run_time, num_streams = benchmark_pipeline( - model_path=args.model_path, - task=args.task_name, + model_path=model_path, + task=task_name, config=config, - batch_size=args.batch_size, - num_cores=args.num_cores, - scenario=args.scenario, - seconds_to_run=args.time, - warmup_time=args.warmup_time, - num_streams=args.num_streams, - thread_pinning=args.thread_pinning, - engine=args.engine, - quiet=args.quiet, + batch_size=batch_size, + num_cores=num_cores, + scenario=scenario, + seconds_to_run=run_time, + warmup_time=warmup_time, + num_streams=num_streams, + thread_pinning=thread_pinning, + engine=engine, + quiet=quiet, ) section_stats = calculate_section_stats(batch_times, total_run_time, num_streams) - items_per_sec = (len(batch_times) * args.batch_size) / total_run_time + items_per_sec = (len(batch_times) * batch_size) / total_run_time benchmark_results = { "items_per_sec": items_per_sec, @@ -498,29 +491,29 @@ def main(): } export_dict = { - "engine": args.engine, + "engine": engine, "version": __version__, - "model_path": args.model_path, - "batch_size": args.batch_size, - "num_cores": args.num_cores, - "scenario": args.scenario, - "seconds_to_run": time, - "num_streams": args.num_streams, + "model_path": model_path, + "batch_size": batch_size, + "num_cores": num_cores, + "scenario": scenario, + "seconds_to_run": run_time, + "num_streams": num_streams, "input_config": dict(config), "benchmark_results": benchmark_results, } # Export results - export_path = args.export_path + export_path = export_path if export_path: _LOGGER.info("Saving benchmark results to JSON file at %s" % export_path) with open(export_path, "w") as out: json.dump(export_dict, out, indent=2) # Results summary - print("Original Model Path: %s" % args.model_path) - print("Batch Size: %d" % args.batch_size) - print("Scenario: %s" % args.scenario) + print("Original Model Path: %s" % model_path) + print("Batch Size: %d" % batch_size) + print("Scenario: %s" % scenario) print("Iterations: %d" % int(benchmark_results["iterations"])) print("Total Runtime: %.4f" % total_run_time) print("Throughput (items/sec): %.4f" % benchmark_results["items_per_sec"]) diff --git a/src/deepsparse/benchmark/data_creation.py b/src/deepsparse/benchmark/data_creation.py index dcdbc9d7a5..0f0849280f 100644 --- a/src/deepsparse/benchmark/data_creation.py +++ b/src/deepsparse/benchmark/data_creation.py @@ -32,8 +32,6 @@ __all__ = [ "get_input_schema_type", - "get_files_with_endings", - "generate_sentence", "generate_image_data", "load_image_data", "generate_text_data", @@ -169,8 +167,12 @@ def load_text_data(config: PipelineBenchmarkConfig, batch_size: int) -> List[str def generate_question_data( - config: PipelineBenchmarkConfig, avg_word_len=5 + config: PipelineBenchmarkConfig, batch_size: int, avg_word_len=5 ) -> Tuple[str, str]: + if batch_size != 1: + _LOGGER.warning( + "Only batch size of 1 supported for Question Answering Pipeline" + ) if config.gen_sequence_length: string_length = config.gen_sequence_length else: @@ -181,7 +183,11 @@ def generate_question_data( return (question, context) -def load_question_data(config: Dict) -> Tuple[str, str]: +def load_question_data(config: Dict, batch_size: int) -> Tuple[str, str]: + if batch_size != 1: + _LOGGER.warning( + "Only batch size of 1 supported for Question Answering Pipeline" + ) if not config.question_file or not config.context_file: raise Exception( "Question and context files must be defined for question_answering pieline" diff --git a/tests/test_pipeline_benchmark.py b/tests/test_pipeline_benchmark.py index 68a1dcc3b3..64cf717765 100644 --- a/tests/test_pipeline_benchmark.py +++ b/tests/test_pipeline_benchmark.py @@ -128,7 +128,7 @@ def test_generate_question_data(): avg_word_len = 10 config_args = {"gen_sequence_length": 50} config = PipelineBenchmarkConfig(**config_args) - question, context = generate_question_data(config, avg_word_len=avg_word_len) + question, context = generate_question_data(config, 1, avg_word_len=avg_word_len) assert len(question) == config.gen_sequence_length assert len(context) == config.gen_sequence_length num_q_spaces = question.count(" ") @@ -145,7 +145,6 @@ def test_generate_question_data(): ("question_answering", SchemaType.QUESTION), ], ) -@pytest.mark.skip(reason="High memory usage, causes GitHub test run to be killed") def test_get_input_schema_type(task_name, input_schema): pipeline = Pipeline.create(task=task_name) assert get_input_schema_type(pipeline) == input_schema From 70f74406dbebf37eb36c4126b1c414aced4b4674 Mon Sep 17 00:00:00 2001 From: Sara Adkins Date: Fri, 4 Aug 2023 17:53:42 -0400 Subject: [PATCH 30/37] Update README with example output --- src/deepsparse/benchmark/README.md | 34 ++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/src/deepsparse/benchmark/README.md b/src/deepsparse/benchmark/README.md index 7912abe18c..0088207347 100644 --- a/src/deepsparse/benchmark/README.md +++ b/src/deepsparse/benchmark/README.md @@ -266,4 +266,38 @@ deepsparse.benchmark_pipeline image_classification zoo:cv/classification/resnet_ Running CodeGen text generation for 30 seconds asynchronously ``` deepsparse.benchmark_pipeline text_generation zoo:nlg/text_generation/codegen_mono-350m/pytorch/huggingface/bigpython_bigquery_thepile/pruned50-none -c config.json -t 30 -s async +``` +### Example Output +Command: +``` +deepsparse.benchmark_pipeline text_classification zoo:nlp/sentiment_analysis/distilbert-none/pytorch/huggingface/sst2/pruned90-none -c config.json +``` +config.json: +```json +{ + "data_type": "real", + "gen_sequence_length": 1000, + "data_folder": "/home/sadkins/text_data/", + "recursive_search": true, + "max_string_length": -1 +} +``` + +Output: +``` +Batch Size: 1 +Scenario: sync +Iterations: 955 +Total Runtime: 10.0090 +Throughput (items/sec): 95.4137 +Processing Time Breakdown: + total_inference: 99.49% + pre_process: 25.70% + engine_forward: 72.56% + post_process: 1.03% +Mean Latency Breakdown (ms/batch): + total_inference: 10.4274 + pre_process: 2.6938 + engine_forward: 7.6051 + post_process: 0.1077 ``` \ No newline at end of file From 3afeec744b3f8993c9361ba9458180bbf7c2c004 Mon Sep 17 00:00:00 2001 From: Sara Adkins Date: Wed, 9 Aug 2023 16:02:24 -0400 Subject: [PATCH 31/37] support for multiple timers, adding docstrings --- .../benchmark/benchmark_pipeline.py | 46 +++++++++++++++---- 1 file changed, 36 insertions(+), 10 deletions(-) diff --git a/src/deepsparse/benchmark/benchmark_pipeline.py b/src/deepsparse/benchmark/benchmark_pipeline.py index 6a445f5764..2dbb4898a0 100644 --- a/src/deepsparse/benchmark/benchmark_pipeline.py +++ b/src/deepsparse/benchmark/benchmark_pipeline.py @@ -149,7 +149,8 @@ def __init__( def run(self): while time.perf_counter() < self._max_time: _ = self._pipeline(self._inputs) - self._time_queue.put(self._pipeline.timer_manager.latest) + for timer in self._pipeline.timer_manager.timers: + self._time_queue.put(timer) def singlestream_benchmark( @@ -163,7 +164,8 @@ def singlestream_benchmark( batch_timings = [] while time.perf_counter() < benchmark_end_time: _ = pipeline(inputs) - batch_timings.append(pipeline.timer_manager.latest) + for timer in pipeline.timer_manager.timers: + batch_timings.append(timer) return batch_timings @@ -249,6 +251,25 @@ def benchmark_pipeline( engine: str = DEEPSPARSE_ENGINE, quiet: bool = False, ) -> Tuple[List[StagedTimer], float]: + """ + Run a benchmark over the specified pipeline, tracking timings for pre-processing, + forward pass and post-processing. Results are printed to the console and optionally + exported to a json file. + + :param model_path: path to onnx model + :param task: name of pipeline to run + :param config: configuration for pipeline inputs + :param batch_size: number of inputs to process each forward pass + :param num_cores: number of physical cores to run on + :param scenario: sync, async or elastic processing + :param seconds_to_run: number of seconds to run benchmark for + :param warmup_time: length to run pipeline before beginning benchmark + :param num_streams: number of parallel streams during async scenario + :param thread_pinning: enable binding threads to cores + :param engine: inference engine, deepsparse or onnxruntime + :param quiet: lower logging verbosity + :return: list of StagedTimer objects for each forward pass and the total run time + """ if quiet: set_logging_level(logging.WARN) @@ -326,14 +347,20 @@ def calculate_statistics( def calculate_section_stats( batch_times: List[StagedTimer], total_run_time: float, num_streams: int ) -> Dict[str, Dict]: - compute_sections = batch_times[0].stages total_run_time_ms = total_run_time * 1000 + section_times = {} + for timer in batch_times: + for section in timer.stages: + if section not in section_times: + section_times[section] = [] + section_times[section].append(timer.times[section] * 1000) + sections = {} - for section in compute_sections: - section_times = [st.times[section] * 1000 for st in batch_times] - sections[section] = calculate_statistics( - section_times, total_run_time_ms, num_streams + for section_name in section_times: + times = section_times[section_name] + sections[section_name] = calculate_statistics( + times, total_run_time_ms, num_streams ) return sections @@ -519,12 +546,11 @@ def main( print("Throughput (items/sec): %.4f" % benchmark_results["items_per_sec"]) print("Processing Time Breakdown: ") - compute_sections = batch_times[0].stages - for section in compute_sections: + for section in section_stats: print(" %s: %.2f%%" % (section, section_stats[section]["total_percentage"])) print("Mean Latency Breakdown (ms/batch): ") - for section in compute_sections: + for section in section_stats: print(" %s: %.4f" % (section, section_stats[section]["mean"])) From df9a3f7bd752c48106ac5dff2ab22085e819fe86 Mon Sep 17 00:00:00 2001 From: Sara Adkins Date: Wed, 9 Aug 2023 16:05:05 -0400 Subject: [PATCH 32/37] docstrings --- src/deepsparse/benchmark/benchmark_pipeline.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/deepsparse/benchmark/benchmark_pipeline.py b/src/deepsparse/benchmark/benchmark_pipeline.py index 2dbb4898a0..e99c513bf1 100644 --- a/src/deepsparse/benchmark/benchmark_pipeline.py +++ b/src/deepsparse/benchmark/benchmark_pipeline.py @@ -159,6 +159,11 @@ def singlestream_benchmark( """ Run pipeline repeatedly on inputs for max_time seconds, storing the runtime of each section of the pipeline in batch_timings + + :param pipeline: pipeline to execute + :param inputs: inputs to pass through pipeline + :param seconds_to_run: how long to run pipeline for + :return: list of timings for each forward pass """ benchmark_end_time = time.perf_counter() + seconds_to_run batch_timings = [] From b0bc84066cc6148bf099cead201f72defa1c0c22 Mon Sep 17 00:00:00 2001 From: Sara Adkins Date: Thu, 10 Aug 2023 11:14:56 -0400 Subject: [PATCH 33/37] add text generation example to README --- src/deepsparse/benchmark/README.md | 41 ++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/src/deepsparse/benchmark/README.md b/src/deepsparse/benchmark/README.md index 0088207347..5c23ff5c14 100644 --- a/src/deepsparse/benchmark/README.md +++ b/src/deepsparse/benchmark/README.md @@ -300,4 +300,45 @@ Mean Latency Breakdown (ms/batch): pre_process: 2.6938 engine_forward: 7.6051 post_process: 0.1077 +``` + +Command: +``` +deepsparse.benchmark_pipeline text_generation zoo:nlg/text_generation/codegen_mono-350m/pytorch/huggingface/bigpython_bigquery_thepile/base_quant-none -c config.json -t 60 +``` +config.json: +```json +{ + "data_type": "dummy", + "gen_sequence_length": 100, + "pipeline_kwargs": {}, + "input_schema_kwargs": {} +} +``` + +Output: +``` +Batch Size: 1 +Scenario: sync +Iterations: 6 +Total Runtime: 62.8005 +Throughput (items/sec): 0.0955 +Processing Time Breakdown: + total_inference: 100.00% + pre_process: 0.00% + engine_forward: 99.98% + post_process: 0.01% + engine_prompt_prefill: 5.83% + engine_prompt_prefill_single: 0.09% + engine_token_generation: 93.64% + engine_token_generation_single: 0.09% +Mean Latency Breakdown (ms/batch): + total_inference: 20932.4786 + pre_process: 0.9729 + engine_forward: 20930.2190 + post_process: 1.2150 + engine_prompt_prefill: 1220.7037 + engine_prompt_prefill_single: 19.0412 + engine_token_generation: 19603.0353 + engine_token_generation_single: 19.1170 ``` \ No newline at end of file From eba70d6d56dbc00e4b42c841e520c75000194889 Mon Sep 17 00:00:00 2001 From: Sara Adkins Date: Thu, 10 Aug 2023 14:50:00 -0400 Subject: [PATCH 34/37] clean up timermanager usage --- .../benchmark/benchmark_pipeline.py | 68 +++++++------------ src/deepsparse/utils/timer.py | 4 ++ tests/test_pipeline_benchmark.py | 7 +- 3 files changed, 35 insertions(+), 44 deletions(-) diff --git a/src/deepsparse/benchmark/benchmark_pipeline.py b/src/deepsparse/benchmark/benchmark_pipeline.py index e99c513bf1..697d24b947 100644 --- a/src/deepsparse/benchmark/benchmark_pipeline.py +++ b/src/deepsparse/benchmark/benchmark_pipeline.py @@ -83,7 +83,6 @@ import json import logging -import queue import threading import time from typing import Dict, List, Tuple @@ -113,7 +112,6 @@ ) from deepsparse.cpu import cpu_architecture from deepsparse.log import set_logging_level -from deepsparse.utils.timer import StagedTimer __all__ = ["benchmark_pipeline"] @@ -127,8 +125,8 @@ class PipelineExecutorThread(threading.Thread): """ - Run pipeline reoeatedly on inputs for max_time seconds, pushing the timer data to - the timer queue to store the runtime of each section of the pipeline. + Run pipeline reoeatedly on inputs for max_time seconds, storing the runtime of each + section of the pipeline in its timer manager For intended usage, see multistream_benchmark """ @@ -137,42 +135,32 @@ def __init__( self, pipeline: Pipeline, inputs: List[any], - time_queue: queue.Queue, max_time: float, ): super(PipelineExecutorThread, self).__init__() self._pipeline = pipeline self._inputs = inputs - self._time_queue = time_queue self._max_time = max_time def run(self): while time.perf_counter() < self._max_time: _ = self._pipeline(self._inputs) - for timer in self._pipeline.timer_manager.timers: - self._time_queue.put(timer) def singlestream_benchmark( pipeline: Pipeline, inputs: List[any], seconds_to_run: float -) -> List[StagedTimer]: +): """ Run pipeline repeatedly on inputs for max_time seconds, storing the runtime of each - section of the pipeline in batch_timings + section of the pipeline in its timer manager :param pipeline: pipeline to execute :param inputs: inputs to pass through pipeline :param seconds_to_run: how long to run pipeline for - :return: list of timings for each forward pass """ benchmark_end_time = time.perf_counter() + seconds_to_run - batch_timings = [] while time.perf_counter() < benchmark_end_time: _ = pipeline(inputs) - for timer in pipeline.timer_manager.timers: - batch_timings.append(timer) - - return batch_timings def multistream_benchmark( @@ -180,17 +168,21 @@ def multistream_benchmark( inputs: List[any], seconds_to_run: float, num_streams: int, -) -> List[StagedTimer]: +): """ Create num_streams threads, each of which calls PipelineExecutorThread.run() for - seconds_to_run seconds. Stores all timing info in a shared queue. + seconds_to_run seconds. All timing info stored in pipeline.timer_manager + + :param pipeline: pipeline to execute + :param inputs: inputs to pass through pipeline + :param seconds_to_run: how long to run pipeline for + :param num_streams: number of threads to launch """ - time_queue = queue.Queue() max_time = time.perf_counter() + seconds_to_run threads = [] for thread in range(num_streams): - threads.append(PipelineExecutorThread(pipeline, inputs, time_queue, max_time)) + threads.append(PipelineExecutorThread(pipeline, inputs, max_time)) for thread in threads: thread.start() # triggers PipelineExecutorThread.run() @@ -198,8 +190,6 @@ def multistream_benchmark( for thread in threads: thread.join() - return list(time_queue.queue) - def create_input_schema( pipeline: Pipeline, @@ -255,7 +245,7 @@ def benchmark_pipeline( thread_pinning: str = "core", engine: str = DEEPSPARSE_ENGINE, quiet: bool = False, -) -> Tuple[List[StagedTimer], float]: +) -> Tuple[Dict[str, List[float]], float]: """ Run a benchmark over the specified pipeline, tracking timings for pre-processing, forward pass and post-processing. Results are printed to the console and optionally @@ -273,7 +263,7 @@ def benchmark_pipeline( :param thread_pinning: enable binding threads to cores :param engine: inference engine, deepsparse or onnxruntime :param quiet: lower logging verbosity - :return: list of StagedTimer objects for each forward pass and the total run time + :return: dictionary of section times for each forward pass and the total run time """ if quiet: @@ -289,6 +279,7 @@ def benchmark_pipeline( input_type = config.data_type kwargs = config.pipeline_kwargs + kwargs["benchmark"] = True pipeline = Pipeline.create( task=task, model_path=model_path, @@ -302,25 +293,25 @@ def benchmark_pipeline( if scenario == "singlestream": singlestream_benchmark(pipeline, inputs, warmup_time) + pipeline.timer_manager.clear() start_time = time.perf_counter() - batch_times = singlestream_benchmark(pipeline, inputs, seconds_to_run) + singlestream_benchmark(pipeline, inputs, seconds_to_run) elif scenario == "multistream": multistream_benchmark(pipeline, inputs, warmup_time, num_streams) + pipeline.timer_manager.clear() start_time = time.perf_counter() - batch_times = multistream_benchmark( - pipeline, inputs, seconds_to_run, num_streams - ) + multistream_benchmark(pipeline, inputs, seconds_to_run, num_streams) elif scenario == "elastic": multistream_benchmark(pipeline, inputs, warmup_time, num_streams) + pipeline.timer_manager.clear() start_time = time.perf_counter() - batch_times = multistream_benchmark( - pipeline, inputs, seconds_to_run, num_streams - ) + multistream_benchmark(pipeline, inputs, seconds_to_run, num_streams) else: raise Exception(f"Unknown scenario '{scenario}'") end_time = time.perf_counter() total_run_time = end_time - start_time + batch_times = pipeline.timer_manager.all_times if len(batch_times) == 0: raise Exception( "Generated no batch timings, try extending benchmark time with '--time'" @@ -350,20 +341,13 @@ def calculate_statistics( def calculate_section_stats( - batch_times: List[StagedTimer], total_run_time: float, num_streams: int + batch_times: Dict[str, List[float]], total_run_time: float, num_streams: int ) -> Dict[str, Dict]: total_run_time_ms = total_run_time * 1000 - section_times = {} - for timer in batch_times: - for section in timer.stages: - if section not in section_times: - section_times[section] = [] - section_times[section].append(timer.times[section] * 1000) - sections = {} - for section_name in section_times: - times = section_times[section_name] + for section_name in batch_times: + times = [t * 1000 for t in batch_times[section_name]] sections[section_name] = calculate_statistics( times, total_run_time_ms, num_streams ) @@ -513,7 +497,7 @@ def main( ) section_stats = calculate_section_stats(batch_times, total_run_time, num_streams) - items_per_sec = (len(batch_times) * batch_size) / total_run_time + items_per_sec = (len(batch_times["total_inference"]) * batch_size) / total_run_time benchmark_results = { "items_per_sec": items_per_sec, diff --git a/src/deepsparse/utils/timer.py b/src/deepsparse/utils/timer.py index 1dcaf77acf..56a3452b6e 100644 --- a/src/deepsparse/utils/timer.py +++ b/src/deepsparse/utils/timer.py @@ -338,6 +338,10 @@ def all_times(self) -> Dict[str, List[float]]: return all_times + def clear(self): + for t in self._timers: + t.clear() + @contextmanager def new_timer_context(self, total_inference: bool = True) -> StagedTimer: """ diff --git a/tests/test_pipeline_benchmark.py b/tests/test_pipeline_benchmark.py index 64cf717765..d083beed5a 100644 --- a/tests/test_pipeline_benchmark.py +++ b/tests/test_pipeline_benchmark.py @@ -28,7 +28,7 @@ generate_text_data, get_input_schema_type, ) -from deepsparse.utils import StagedTimer +from deepsparse.utils import StagedTimer, TimerManager from tests.helpers import run_command @@ -97,6 +97,7 @@ def test_pipeline_benchmark( assert res.returncode == 0 assert "error" not in res.stdout.lower() assert "fail" not in res.stdout.lower() + assert "total_inference" in res.stdout.lower() def test_generate_image_data(): @@ -152,6 +153,7 @@ def test_get_input_schema_type(task_name, input_schema): def test_calculations(): batch_times = [] + timer_manager = TimerManager() for i in range(5): timer = StagedTimer() timer._staged_start_times["stage_1"] = [i + 0.1] @@ -160,8 +162,9 @@ def test_calculations(): timer._staged_start_times["stage_2"] = [i + 0.6] timer._staged_stop_times["stage_2"] = [i + 0.9] - batch_times.append(timer) + timer_manager._timers.append(timer) + batch_times = timer_manager.all_times total_run_time = 6.0 section_stats = calculate_section_stats(batch_times, total_run_time, 1) assert math.isclose( From 1eb3202e0afa16fdde10e6449147da1fc6f89e4e Mon Sep 17 00:00:00 2001 From: Sara Adkins Date: Tue, 15 Aug 2023 12:41:55 -0400 Subject: [PATCH 35/37] PR comments --- .../benchmark/benchmark_pipeline.py | 14 ++++----- src/deepsparse/benchmark/data_creation.py | 29 ++++++++----------- src/deepsparse/benchmark/helpers.py | 29 +++++++++++++++---- tests/test_pipeline_benchmark.py | 18 ++++++------ 4 files changed, 52 insertions(+), 38 deletions(-) diff --git a/src/deepsparse/benchmark/benchmark_pipeline.py b/src/deepsparse/benchmark/benchmark_pipeline.py index 697d24b947..88283a7dec 100644 --- a/src/deepsparse/benchmark/benchmark_pipeline.py +++ b/src/deepsparse/benchmark/benchmark_pipeline.py @@ -95,9 +95,9 @@ from deepsparse.benchmark.config import PipelineBenchmarkConfig, PipelineInputType from deepsparse.benchmark.data_creation import ( SchemaType, - generate_image_data, - generate_question_data, - generate_text_data, + generate_random_image_data, + generate_random_question_data, + generate_random_text_data, get_input_schema_type, load_image_data, load_question_data, @@ -202,16 +202,16 @@ def create_input_schema( if input_type == PipelineInputType.DUMMY: if input_schema_requirement == SchemaType.IMAGE: - input_data = generate_image_data(config, batch_size) + input_data = generate_random_image_data(config, batch_size) inputs = pipeline.input_schema(images=input_data, **kwargs) elif input_schema_requirement == SchemaType.TEXT_SEQ: - input_data = generate_text_data(config, batch_size) + input_data = generate_random_text_data(config, batch_size) inputs = pipeline.input_schema(sequences=input_data, **kwargs) elif input_schema_requirement == SchemaType.TEXT_INPUT: - input_data = generate_text_data(config, batch_size) + input_data = generate_random_text_data(config, batch_size) inputs = pipeline.input_schema(inputs=input_data, **kwargs) elif input_schema_requirement == SchemaType.QUESTION: - question, context = generate_question_data(config, batch_size) + question, context = generate_random_question_data(config, batch_size) inputs = pipeline.input_schema(question=question, context=context, **kwargs) elif input_type == PipelineInputType.REAL: if input_schema_requirement == SchemaType.IMAGE: diff --git a/src/deepsparse/benchmark/data_creation.py b/src/deepsparse/benchmark/data_creation.py index 0f0849280f..7c6c378b1b 100644 --- a/src/deepsparse/benchmark/data_creation.py +++ b/src/deepsparse/benchmark/data_creation.py @@ -32,11 +32,11 @@ __all__ = [ "get_input_schema_type", - "generate_image_data", + "generate_random_image_data", "load_image_data", - "generate_text_data", + "generate_random_text_data", "load_text_data", - "generate_question_data", + "generate_random_question_data", "load_question_data", ] @@ -86,7 +86,7 @@ def get_files_with_endings( return random.sample(files, num_files) -def generate_sentence(string_length: int, avg_word_length: int = 5): +def generate_random_sentence(string_length: int, avg_word_length: int = 5): random_chars = "".join(random.choices(string.ascii_letters, k=string_length)) space_locations = random.sample( range(string_length), int(string_length / avg_word_length) @@ -97,7 +97,7 @@ def generate_sentence(string_length: int, avg_word_length: int = 5): return "".join(random_chars) -def generate_image_data( +def generate_random_image_data( config: PipelineBenchmarkConfig, batch_size: int ) -> List[numpy.ndarray]: input_data = [] @@ -107,15 +107,10 @@ def generate_image_data( image_shape = DEFAULT_IMAGE_SHAPE _LOGGER.warning( f"Could not parse {config.input_image_shape}, " - "Using default image shape {image_shape}" + f"Using default image shape {image_shape}" ) - for _ in range(batch_size): - rand_array = numpy.random.randint(0, high=255, size=image_shape).astype( - numpy.uint8 - ) - input_data.append(rand_array) - + input_data = [numpy.random.randint(0, high=255, size=image_shape).astype(numpy.uint8) for _ in range(batch_size)] return input_data @@ -129,7 +124,7 @@ def load_image_data(config: PipelineBenchmarkConfig, batch_size: int) -> List[st ) -def generate_text_data( +def generate_random_text_data( config: PipelineBenchmarkConfig, batch_size: int, avg_word_len=5 ) -> List[str]: if config.gen_sequence_length: @@ -139,7 +134,7 @@ def generate_text_data( _LOGGER.warning("Ssing default string length %d" % string_length) input_data = [ - generate_sentence(string_length, avg_word_length=avg_word_len) + generate_random_sentence(string_length, avg_word_length=avg_word_len) for _ in range(batch_size) ] return input_data @@ -166,7 +161,7 @@ def load_text_data(config: PipelineBenchmarkConfig, batch_size: int) -> List[str return input_data -def generate_question_data( +def generate_random_question_data( config: PipelineBenchmarkConfig, batch_size: int, avg_word_len=5 ) -> Tuple[str, str]: if batch_size != 1: @@ -178,8 +173,8 @@ def generate_question_data( else: string_length = DEFAULT_STRING_LENGTH _LOGGER.warning("Using default string length %d" % string_length) - question = generate_sentence(string_length, avg_word_length=avg_word_len) - context = generate_sentence(string_length, avg_word_length=avg_word_len) + question = generate_random_sentence(string_length, avg_word_length=avg_word_len) + context = generate_random_sentence(string_length, avg_word_length=avg_word_len) return (question, context) diff --git a/src/deepsparse/benchmark/helpers.py b/src/deepsparse/benchmark/helpers.py index ef6b6c92a5..baa36b47ba 100644 --- a/src/deepsparse/benchmark/helpers.py +++ b/src/deepsparse/benchmark/helpers.py @@ -36,17 +36,29 @@ DEFAULT_STRING_LENGTH = 50 DEFAULT_IMAGE_SHAPE = (240, 240, 3) +class ThreadPinningMode: + CORE: str = "core" + NUMA: str = "numa" + NONE: str = "none" + def decide_thread_pinning(pinning_mode: str) -> None: + """ + Enable binding threads to cores ('core' the default), threads to cores on sockets + ('numa'), or disable ('none')" + + :param pinning_mode: thread pinning mode to use + :return: None + """ pinning_mode = pinning_mode.lower() - if pinning_mode in "core": + if pinning_mode == ThreadPinningMode.CORE: os.environ["NM_BIND_THREADS_TO_CORES"] = "1" _LOGGER.info("Thread pinning to cores enabled") - elif pinning_mode in "numa": + elif pinning_mode == ThreadPinningMode.NUMA: os.environ["NM_BIND_THREADS_TO_CORES"] = "0" os.environ["NM_BIND_THREADS_TO_SOCKETS"] = "1" _LOGGER.info("Thread pinning to socket/numa nodes enabled") - elif pinning_mode in "none": + elif pinning_mode in ThreadPinningMode.NONE: os.environ["NM_BIND_THREADS_TO_CORES"] = "0" os.environ["NM_BIND_THREADS_TO_SOCKETS"] = "0" _LOGGER.info("Thread pinning disabled, performance may be sub-optimal") @@ -57,6 +69,12 @@ def decide_thread_pinning(pinning_mode: str) -> None: def parse_scheduler(scenario: str) -> Scheduler: + """ + Returns a threading scheduler based on desired scenario + + :param scenario: scheduling scenario to use + :return: scehduler with desred scenario + """ scenario = scenario.lower() if scenario == "multistream": return Scheduler.multi_stream @@ -77,7 +95,7 @@ def parse_scenario(scenario: str) -> str: elif scenario == "elastic": return "elastic" else: - _LOGGER.info( + _LOGGER.warning( "Recieved invalid option for scenario'%s', defaulting to async" % scenario ) return "multistream" @@ -96,7 +114,7 @@ def parse_num_streams(num_streams: int, num_cores: int, scenario: str): return num_streams else: default_num_streams = max(1, int(num_cores / 2)) - _LOGGER.info( + _LOGGER.warning( "num_streams default value chosen of %d. " "This requires tuning and may be sub-optimal" % default_num_streams ) @@ -111,3 +129,4 @@ def parse_input_config(input_config_file: str) -> Dict[str, any]: return PipelineBenchmarkConfig(**config) except ValidationError as e: _LOGGER.error(e) + raise e diff --git a/tests/test_pipeline_benchmark.py b/tests/test_pipeline_benchmark.py index d083beed5a..ad685e9c77 100644 --- a/tests/test_pipeline_benchmark.py +++ b/tests/test_pipeline_benchmark.py @@ -23,9 +23,9 @@ from deepsparse.benchmark.config import PipelineBenchmarkConfig from deepsparse.benchmark.data_creation import ( SchemaType, - generate_image_data, - generate_question_data, - generate_text_data, + generate_random_image_data, + generate_random_question_data, + generate_random_text_data, get_input_schema_type, ) from deepsparse.utils import StagedTimer, TimerManager @@ -100,11 +100,11 @@ def test_pipeline_benchmark( assert "total_inference" in res.stdout.lower() -def test_generate_image_data(): +def test_generate_random_image_data(): batch_size = 32 config_args = {"input_image_shape": (600, 600, 1)} config = PipelineBenchmarkConfig(**config_args) - image_data = generate_image_data(config, batch_size) + image_data = generate_random_image_data(config, batch_size) assert len(image_data) == batch_size img = image_data[0] assert img.shape == (600, 600, 1) @@ -112,12 +112,12 @@ def test_generate_image_data(): assert numpy.max(img) < 255 and numpy.min(img) >= 0 -def test_generate_text_data(): +def test_generate_random_text_data(): batch_size = 16 avg_word_len = 8 config_args = {"gen_sequence_length": 250} config = PipelineBenchmarkConfig(**config_args) - text_data = generate_text_data(config, batch_size, avg_word_len=avg_word_len) + text_data = generate_random_text_data(config, batch_size, avg_word_len=avg_word_len) assert len(text_data) == batch_size text = text_data[0] assert len(text) == 250 @@ -125,11 +125,11 @@ def test_generate_text_data(): assert num_spaces == int(len(text) / avg_word_len) -def test_generate_question_data(): +def test_generate_random_question_data(): avg_word_len = 10 config_args = {"gen_sequence_length": 50} config = PipelineBenchmarkConfig(**config_args) - question, context = generate_question_data(config, 1, avg_word_len=avg_word_len) + question, context = generate_random_question_data(config, 1, avg_word_len=avg_word_len) assert len(question) == config.gen_sequence_length assert len(context) == config.gen_sequence_length num_q_spaces = question.count(" ") From 289f545b66c98c6b07951e1f2e3dd1d3b842281f Mon Sep 17 00:00:00 2001 From: Sara Adkins Date: Tue, 15 Aug 2023 12:42:20 -0400 Subject: [PATCH 36/37] style --- src/deepsparse/benchmark/data_creation.py | 5 ++++- src/deepsparse/benchmark/helpers.py | 1 + tests/test_pipeline_benchmark.py | 4 +++- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/deepsparse/benchmark/data_creation.py b/src/deepsparse/benchmark/data_creation.py index 7c6c378b1b..0b6c921b9d 100644 --- a/src/deepsparse/benchmark/data_creation.py +++ b/src/deepsparse/benchmark/data_creation.py @@ -110,7 +110,10 @@ def generate_random_image_data( f"Using default image shape {image_shape}" ) - input_data = [numpy.random.randint(0, high=255, size=image_shape).astype(numpy.uint8) for _ in range(batch_size)] + input_data = [ + numpy.random.randint(0, high=255, size=image_shape).astype(numpy.uint8) + for _ in range(batch_size) + ] return input_data diff --git a/src/deepsparse/benchmark/helpers.py b/src/deepsparse/benchmark/helpers.py index baa36b47ba..703dafa92c 100644 --- a/src/deepsparse/benchmark/helpers.py +++ b/src/deepsparse/benchmark/helpers.py @@ -36,6 +36,7 @@ DEFAULT_STRING_LENGTH = 50 DEFAULT_IMAGE_SHAPE = (240, 240, 3) + class ThreadPinningMode: CORE: str = "core" NUMA: str = "numa" diff --git a/tests/test_pipeline_benchmark.py b/tests/test_pipeline_benchmark.py index ad685e9c77..485599d044 100644 --- a/tests/test_pipeline_benchmark.py +++ b/tests/test_pipeline_benchmark.py @@ -129,7 +129,9 @@ def test_generate_random_question_data(): avg_word_len = 10 config_args = {"gen_sequence_length": 50} config = PipelineBenchmarkConfig(**config_args) - question, context = generate_random_question_data(config, 1, avg_word_len=avg_word_len) + question, context = generate_random_question_data( + config, 1, avg_word_len=avg_word_len + ) assert len(question) == config.gen_sequence_length assert len(context) == config.gen_sequence_length num_q_spaces = question.count(" ") From 749a7521447435dd0b0af53e1009b419892f07f7 Mon Sep 17 00:00:00 2001 From: Sara Adkins Date: Tue, 15 Aug 2023 12:53:03 -0400 Subject: [PATCH 37/37] PR comments --- src/deepsparse/benchmark/benchmark_pipeline.py | 4 +++- src/deepsparse/benchmark/data_creation.py | 16 ++++++++-------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/deepsparse/benchmark/benchmark_pipeline.py b/src/deepsparse/benchmark/benchmark_pipeline.py index 88283a7dec..373e6257bb 100644 --- a/src/deepsparse/benchmark/benchmark_pipeline.py +++ b/src/deepsparse/benchmark/benchmark_pipeline.py @@ -79,6 +79,8 @@ zoo:nlg/text_generation/codegen_mono-350m/pytorch/huggingface/ bigpython_bigquery_thepile/pruned50-none \ -c config.json -t 30 -s async + +Refer to README for config.json examples """ import json @@ -125,7 +127,7 @@ class PipelineExecutorThread(threading.Thread): """ - Run pipeline reoeatedly on inputs for max_time seconds, storing the runtime of each + Run pipeline repeatedly on inputs for max_time seconds, storing the runtime of each section of the pipeline in its timer manager For intended usage, see multistream_benchmark diff --git a/src/deepsparse/benchmark/data_creation.py b/src/deepsparse/benchmark/data_creation.py index 0b6c921b9d..c44a2a455c 100644 --- a/src/deepsparse/benchmark/data_creation.py +++ b/src/deepsparse/benchmark/data_creation.py @@ -25,11 +25,6 @@ from deepsparse.benchmark.config import PipelineBenchmarkConfig -_LOGGER = logging.getLogger(__name__) - -DEFAULT_STRING_LENGTH = 50 -DEFAULT_IMAGE_SHAPE = (240, 240, 3) - __all__ = [ "get_input_schema_type", "generate_random_image_data", @@ -40,6 +35,11 @@ "load_question_data", ] +_LOGGER = logging.getLogger(__name__) + +DEFAULT_STRING_LENGTH = 50 +DEFAULT_IMAGE_SHAPE = (240, 240, 3) + class SchemaType: IMAGE: str = "images" @@ -72,7 +72,7 @@ def get_input_schema_type(pipeline: Pipeline) -> str: raise Exception("Unknown schema requirement {}".format(input_schema_requirements)) -def get_files_with_endings( +def get_files_with_suffixes( folder: str, num_files: int, recursive: bool, file_endings: Tuple[str] ) -> List[str]: if not path.exists(folder): @@ -122,7 +122,7 @@ def load_image_data(config: PipelineBenchmarkConfig, batch_size: int) -> List[st raise Exception("Data folder must be defined for real inputs") path_to_data = config.data_folder recursive_search = config.recursive_search - return get_files_with_endings( + return get_files_with_suffixes( path_to_data, batch_size, recursive_search, (".jpg", ".jpeg", ".gif") ) @@ -148,7 +148,7 @@ def load_text_data(config: PipelineBenchmarkConfig, batch_size: int) -> List[str raise Exception("Data folder must be defined for real inputs") path_to_data = config.data_folder recursive_search = config.recursive_search - input_files = get_files_with_endings( + input_files = get_files_with_suffixes( path_to_data, batch_size, recursive_search, (".txt") ) if config.max_string_length: