From 292d6ad2cc4d7d7d0c3ae7808aede826514b4897 Mon Sep 17 00:00:00 2001 From: "Navulla, Vamshi Krishna" Date: Wed, 1 May 2024 18:58:48 +0000 Subject: [PATCH 01/23] Merge changes from intel-sandbox/serve --- .../org/pytorch/serve/util/ConfigManager.java | 122 +++++++++------ ts/metrics/system_metrics.py | 140 ++++++++++++++---- ts/torch_handler/base_handler.py | 20 ++- 3 files changed, 201 insertions(+), 81 deletions(-) diff --git a/frontend/server/src/main/java/org/pytorch/serve/util/ConfigManager.java b/frontend/server/src/main/java/org/pytorch/serve/util/ConfigManager.java index 9c0657c14a..2701f3dbb9 100644 --- a/frontend/server/src/main/java/org/pytorch/serve/util/ConfigManager.java +++ b/frontend/server/src/main/java/org/pytorch/serve/util/ConfigManager.java @@ -235,6 +235,7 @@ private ConfigManager(Arguments args) throws IOException { prop.setProperty(TS_LOAD_MODELS, String.join(",", models)); } + prop.setProperty( TS_NUMBER_OF_GPU, String.valueOf( @@ -242,6 +243,7 @@ private ConfigManager(Arguments args) throws IOException { getAvailableGpu(), getIntProperty(TS_NUMBER_OF_GPU, Integer.MAX_VALUE)))); + String pythonExecutable = args.getPythonExecutable(); if (pythonExecutable != null) { prop.setProperty(PYTHON_EXECUTABLE, pythonExecutable); @@ -429,8 +431,9 @@ public int getJobQueueSize() { } public int getNumberOfGpu() { + // return 1; return getIntProperty(TS_NUMBER_OF_GPU, 0); - } + } public String getMetricsConfigPath() { String path = getCanonicalPath(prop.getProperty(TS_METRICS_CONFIG)); @@ -582,7 +585,7 @@ public String getCertificateFile() { public String getSystemMetricsCmd() { return prop.getProperty(SYSTEM_METRICS_CMD, ""); } - + public SslContext getSslContext() throws IOException, GeneralSecurityException { List supportedCiphers = Arrays.asList( @@ -846,58 +849,85 @@ private static String getCanonicalPath(String path) { } private static int getAvailableGpu() { - try { - List gpuIds = new ArrayList<>(); - String visibleCuda = System.getenv("CUDA_VISIBLE_DEVICES"); - if (visibleCuda != null && !visibleCuda.isEmpty()) { - String[] ids = visibleCuda.split(","); - for (String id : ids) { - gpuIds.add(Integer.parseInt(id)); - } - } else if (System.getProperty("os.name").startsWith("Mac")) { - Process process = Runtime.getRuntime().exec("system_profiler SPDisplaysDataType"); - int ret = process.waitFor(); - if (ret != 0) { - return 0; - } + try { + System.out.println("getAvailableGpu 1"); + + List gpuIds = new ArrayList<>(); + String visibleCuda = System.getenv("CUDA_VISIBLE_DEVICES"); + if (visibleCuda != null && !visibleCuda.isEmpty()) { + String[] ids = visibleCuda.split(","); + for (String id : ids) { + gpuIds.add(Integer.parseInt(id)); + } + } else if (System.getProperty("os.name").startsWith("Mac")) { + Process process = Runtime.getRuntime().exec("system_profiler SPDisplaysDataType"); + int ret = process.waitFor(); + if (ret != 0) { + return 0; + } - BufferedReader reader = - new BufferedReader(new InputStreamReader(process.getInputStream())); - String line; - while ((line = reader.readLine()) != null) { - if (line.contains("Chipset Model:") && !line.contains("Apple M1")) { + BufferedReader reader = + new BufferedReader(new InputStreamReader(process.getInputStream())); + String line; + while ((line = reader.readLine()) != null) { + if (line.contains("Chipset Model:") && !line.contains("Apple M1")) { + return 0; + } + if (line.contains("Total Number of Cores:")) { + String[] parts = line.split(":"); + if (parts.length >= 2) { + return (Integer.parseInt(parts[1].trim())); + } + } + } + throw new AssertionError("Unexpected response."); + } else { + + System.out.println("getAvailableGpu 2"); + + try { + Process process = + Runtime.getRuntime().exec("nvidia-smi --query-gpu=index --format=csv"); + int ret = process.waitFor(); + if (ret != 0) { return 0; } - if (line.contains("Total Number of Cores:")) { - String[] parts = line.split(":"); - if (parts.length >= 2) { - return (Integer.parseInt(parts[1].trim())); + List list = + IOUtils.readLines(process.getInputStream(), StandardCharsets.UTF_8); + if (list.isEmpty() || !"index".equals(list.get(0))) { + throw new AssertionError("Unexpected nvidia-smi response."); + } + for (int i = 1; i < list.size(); i++) { + gpuIds.add(Integer.parseInt(list.get(i))); + } + }catch (IOException | InterruptedException e) { + System.out.println("nvidia-smi not available or failed: " + e.getMessage()); + } + System.out.println("getAvailableGpu 3"); + Process process = Runtime.getRuntime().exec("xpu-smi discovery --dump 1"); + int ret = process.waitFor(); + if (ret != 0) { + return 0; } + System.out.println("Checking for Intel GPUs"); + List list = + IOUtils.readLines(process.getInputStream(), StandardCharsets.UTF_8); + if (list.isEmpty() || !list.get(0).contains("Device ID")) { + throw new AssertionError("Unexpected xpu-smi response."); } + for (int i = 1; i < list.size(); i++) { + gpuIds.add(Integer.parseInt(list.get(i))); + } + + + } - throw new AssertionError("Unexpected response."); - } else { - Process process = - Runtime.getRuntime().exec("nvidia-smi --query-gpu=index --format=csv"); - int ret = process.waitFor(); - if (ret != 0) { - return 0; - } - List list = - IOUtils.readLines(process.getInputStream(), StandardCharsets.UTF_8); - if (list.isEmpty() || !"index".equals(list.get(0))) { - throw new AssertionError("Unexpected nvidia-smi response."); - } - for (int i = 1; i < list.size(); i++) { - gpuIds.add(Integer.parseInt(list.get(i))); - } + System.out.println("Number of GPUs found :"+ gpuIds.size()); + return gpuIds.size(); + } catch (IOException | InterruptedException e) { + return 0; } - - return gpuIds.size(); - } catch (IOException | InterruptedException e) { - return 0; } - } public List getAllowedUrls() { String allowedURL = prop.getProperty(TS_ALLOWED_URLS, DEFAULT_TS_ALLOWED_URLS); diff --git a/ts/metrics/system_metrics.py b/ts/metrics/system_metrics.py index e0a21f1c4f..7b5633fd5a 100644 --- a/ts/metrics/system_metrics.py +++ b/ts/metrics/system_metrics.py @@ -4,9 +4,10 @@ import logging import types from builtins import str +import time import psutil - +import subprocess from ts.metrics.dimension import Dimension from ts.metrics.metric import Metric @@ -61,42 +62,119 @@ def gpu_utilization(num_of_gpu): # pylint: disable=wrong-import-position # pylint: disable=import-outside-toplevel - import nvgpu - import pynvml - from nvgpu import list_gpus + # import nvgpu + # import pynvml + # from nvgpu import list_gpus # pylint: enable=wrong-import-position # pylint: enable=import-outside-toplevel - info = nvgpu.gpu_info() - for value in info: - dimension_gpu = [ - Dimension("Level", "Host"), - Dimension("device_id", value["index"]), - ] - system_metrics.append( - Metric( - "GPUMemoryUtilization", - value["mem_used_percent"], - "percent", - dimension_gpu, - ) - ) - system_metrics.append( - Metric("GPUMemoryUsed", value["mem_used"], "MB", dimension_gpu) - ) - + # info = nvgpu.gpu_info() + # for value in info: + # dimension_gpu = [ + # Dimension("Level", "Host"), + # Dimension("device_id", value["index"]), + # ] + # system_metrics.append( + # Metric( + # "GPUMemoryUtilization", + # value["mem_used_percent"], + # "percent", + # dimension_gpu, + # ) + # ) + # system_metrics.append( + # Metric("GPUMemoryUsed", value["mem_used"], "MB", dimension_gpu) + # ) + + # try: + # statuses = list_gpus.device_statuses() + # except pynvml.nvml.NVMLError_NotSupported: + # logging.error("gpu device monitoring not supported") + # statuses = [] + + # for idx, status in enumerate(statuses): + # dimension_gpu = [Dimension("Level", "Host"), Dimension("device_id", idx)] + # system_metrics.append( + # Metric("GPUUtilization", status["utilization"], "percent", dimension_gpu) + # ) + + logging.info(f"XPU Utillization: {num_of_gpu}") + start_time = time.time() + timeout = 1 try: - statuses = list_gpus.device_statuses() - except pynvml.nvml.NVMLError_NotSupported: - logging.error("gpu device monitoring not supported") - statuses = [] - - for idx, status in enumerate(statuses): - dimension_gpu = [Dimension("Level", "Host"), Dimension("device_id", idx)] - system_metrics.append( - Metric("GPUUtilization", status["utilization"], "percent", dimension_gpu) + # Run the xpu-smi command to get GPU metrics + process = subprocess.Popen( + ["xpu-smi", "dump", "-d", "0", "-m", "0,5"], + stdout=subprocess.PIPE, + text=True # Ensures that output is in text form ) + output_lines = [] + while True: + current_time = time.time() + if current_time - start_time > timeout: + break + + # Try to read a line of output + lines = process.stdout.readline() + if not lines: + break + output_lines.append(lines.strip()) + + # You can process lines here or later + print(lines.strip()) # Example of processing output in real-time + + # Parse the output to extract GPU metrics + headers = output_lines[0].split(', ') + data_lines = output_lines[1:] + for line in data_lines: + values = line.split(', ') + if len(values) != len(headers): + logging.error(f"Data format error in line: {line}") + continue + + # Create a dictionary for easy access to each column + data_dict = dict(zip(headers, values)) + + # Extract necessary data + timestamp = data_dict["Timestamp"] + device_id = data_dict["DeviceId"] + gpu_utilization = data_dict["GPU Utilization (%)"] + memory_utilization = data_dict["GPU Memory Utilization (%)"] + + # Create dimensions + dimensions = [ + Dimension("Level", "Host"), + Dimension("DeviceId", device_id) + ] + + # Append GPU Utilization Metric + system_metrics.append( + Metric( + "GPUUtilization", + gpu_utilization, + "percent", + dimensions + ) + ) + + # Append GPU Memory Utilization Metric + system_metrics.append( + Metric( + "GPUMemoryUtilization", + memory_utilization, + "percent", + dimensions + ) + ) + # logging.info(f"Added metric: {system_metrics[-1]}") + # logging.info(f"Added metric: {system_metrics[-2]}") + + + except FileNotFoundError: + logging.error("xpu-smi command not found. Cannot collect Intel GPU metrics.") + except subprocess.CalledProcessError as e: + logging.error("Error running xpu-smi command: %s", e) def collect_all(mod, num_of_gpu): diff --git a/ts/torch_handler/base_handler.py b/ts/torch_handler/base_handler.py index ad33de7c48..23118710e9 100644 --- a/ts/torch_handler/base_handler.py +++ b/ts/torch_handler/base_handler.py @@ -20,6 +20,7 @@ load_label_mapping, ) + if packaging.version.parse(torch.__version__) >= packaging.version.parse("1.8.1"): from torch.profiler import ProfilerActivity, profile, record_function @@ -61,8 +62,11 @@ if os.environ.get("TS_IPEX_ENABLE", "false") == "true": try: import intel_extension_for_pytorch as ipex - + print("Succesfully imported ipex") IPEX_AVAILABLE = True + if torch.xpu.is_available(): + IPEX_GPU = True + logger.info("Torch support for Intel GPU enabled") except ImportError as error: logger.warning( "IPEX is enabled but intel-extension-for-pytorch is not installed. Proceeding without IPEX." @@ -71,7 +75,7 @@ else: IPEX_AVAILABLE = False - + try: import onnxruntime as ort import psutil @@ -139,7 +143,7 @@ def initialize(self, context): RuntimeError: Raises the Runtime error when the model.py is missing """ - + if context is not None and hasattr(context, "model_yaml_config"): self.model_yaml_config = context.model_yaml_config @@ -248,6 +252,9 @@ def initialize(self, context): elif IPEX_AVAILABLE: self.model = self.model.to(memory_format=torch.channels_last) + if IPEX_GPU: + self.model = self.model.to("xpu") # IPEX GPU + logger.info("Model loaded on GPU") self.model = ipex.optimize(self.model) logger.info(f"Compiled model with ipex") @@ -362,7 +369,12 @@ def inference(self, data, *args, **kwargs): Torch Tensor : The Predicted Torch Tensor is returned in this function. """ with torch.inference_mode(): - marshalled_data = data.to(self.device) + if IPEX_AVAILABLE and IPEX_GPU: # IPEX GPU + logger.info("GPU Enabled") + marshalled_data = data.to("xpu") + # print(marshalled_data, "Data on Device") + else: + marshalled_data = data.to(self.device) results = self.model(marshalled_data, *args, **kwargs) return results From 88bea0d3a37c65bbba1721ccf1313da0b63d6bd6 Mon Sep 17 00:00:00 2001 From: "Navulla, Vamshi Krishna" Date: Thu, 2 May 2024 22:46:13 +0000 Subject: [PATCH 02/23] ipex_gpu_enable - New config in config.properties --- .../main/java/org/pytorch/serve/util/ConfigManager.java | 9 ++++++++- ts/torch_handler/base_handler.py | 7 +++++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/frontend/server/src/main/java/org/pytorch/serve/util/ConfigManager.java b/frontend/server/src/main/java/org/pytorch/serve/util/ConfigManager.java index 2701f3dbb9..43d57de4bc 100644 --- a/frontend/server/src/main/java/org/pytorch/serve/util/ConfigManager.java +++ b/frontend/server/src/main/java/org/pytorch/serve/util/ConfigManager.java @@ -79,6 +79,7 @@ public final class ConfigManager { private static final String TS_IPEX_ENABLE = "ipex_enable"; private static final String TS_CPU_LAUNCHER_ENABLE = "cpu_launcher_enable"; private static final String TS_CPU_LAUNCHER_ARGS = "cpu_launcher_args"; + private static final String TS_IPEX_GPU_ENABLE = "ipex_gpu_enable"; private static final String TS_ASYNC_LOGGING = "async_logging"; private static final String TS_CORS_ALLOWED_ORIGIN = "cors_allowed_origin"; @@ -144,7 +145,7 @@ public final class ConfigManager { private static Pattern pattern = Pattern.compile("\\$\\$([^$]+[^$])\\$\\$"); private Pattern blacklistPattern; - private Properties prop; + private Properties prop; private boolean snapshotDisabled; @@ -418,6 +419,11 @@ public String getCPULauncherArgs() { return getProperty(TS_CPU_LAUNCHER_ARGS, null); } + public boolean isIPEXGpuEnabled() { + return Boolean.parseBoolean(getProperty(TS_IPEX_GPU_ENABLE, "false")); + } + + public int getNettyThreads() { return getIntProperty(TS_NUMBER_OF_NETTY_THREADS, 0); } @@ -830,6 +836,7 @@ public HashMap getBackendConfiguration() { // Append properties used by backend worker here config.put("TS_DECODE_INPUT_REQUEST", prop.getProperty(TS_DECODE_INPUT_REQUEST, "true")); config.put("TS_IPEX_ENABLE", prop.getProperty(TS_IPEX_ENABLE, "false")); + config.put("TS_IPEX_GPU_ENABLE", prop.getProperty(TS_IPEX_GPU_ENABLE, "false")); return config; } diff --git a/ts/torch_handler/base_handler.py b/ts/torch_handler/base_handler.py index 23118710e9..96d2298675 100644 --- a/ts/torch_handler/base_handler.py +++ b/ts/torch_handler/base_handler.py @@ -62,11 +62,14 @@ if os.environ.get("TS_IPEX_ENABLE", "false") == "true": try: import intel_extension_for_pytorch as ipex - print("Succesfully imported ipex") + # print("Succesfully imported ipex TS_IPEX_GPU_ENABLE :", os.environ.get("TS_IPEX_GPU_ENABLE", "false")) IPEX_AVAILABLE = True + if torch.xpu.is_available() and os.environ.get("TS_IPEX_GPU_ENABLE", "false") == "true": if torch.xpu.is_available(): IPEX_GPU = True logger.info("Torch support for Intel GPU enabled") + else: + IPEX_GPU = False except ImportError as error: logger.warning( "IPEX is enabled but intel-extension-for-pytorch is not installed. Proceeding without IPEX." @@ -547,4 +550,4 @@ def get_device(self): Returns: string : self device """ - return self.device + return self.device \ No newline at end of file From ee159f115f8b681bc5c7e707afe741d05b175266 Mon Sep 17 00:00:00 2001 From: "Navulla, Vamshi Krishna" Date: Fri, 3 May 2024 00:08:52 +0000 Subject: [PATCH 03/23] Instructions for IPEX GPU support --- .../intel_extension_for_pytorch/README.md | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/examples/intel_extension_for_pytorch/README.md b/examples/intel_extension_for_pytorch/README.md index 718d08e8af..797ae556e3 100644 --- a/examples/intel_extension_for_pytorch/README.md +++ b/examples/intel_extension_for_pytorch/README.md @@ -9,6 +9,7 @@ Here we show how to use TorchServe with Intel® Extension for PyTorch*. * [Install Intel® Extension for PyTorch*](https://github.com/pytorch/serve/blob/master/examples/intel_extension_for_pytorch/README.md#install-intel-extension-for-pytorch) * [Serving model with Intel® Extension for PyTorch*](https://github.com/pytorch/serve/blob/master/examples/intel_extension_for_pytorch/README.md#serving-model-with-intel-extension-for-pytorch) * [TorchServe with Launcher](#torchserve-with-launcher) +* [TorchServe with Intel® Extension for PyTorch* and Intel GPUs](#torchserve-with-intel®-extension-for-pytorch-and-intel-gpus) * [Creating and Exporting INT8 model for Intel® Extension for PyTorch*](https://github.com/pytorch/serve/blob/master/examples/intel_extension_for_pytorch/README.md#creating-and-exporting-int8-model-for-intel-extension-for-pytorch) * [Benchmarking with Launcher](#benchmarking-with-launcher) * [Performance Boost with Intel® Extension for PyTorch* and Launcher](https://github.com/pytorch/serve/blob/master/examples/intel_extension_for_pytorch/README.md#performance-boost-with-intel-extension-for-pytorch-and-launcher) @@ -73,6 +74,7 @@ CPU usage is shown below. 4 main worker threads were launched, each launching 14 ![26](https://user-images.githubusercontent.com/93151422/170373651-fd8a0363-febf-4528-bbae-e1ddef119358.gif) + #### Scaling workers Additionally when dynamically [scaling the number of workers](https://pytorch.org/serve/management_api.html#scale-workers), cores that were pinned to killed workers by the launcher could be left unutilized. To address this problem, launcher internally restarts the workers to re-distribute cores that were pinned to killed workers to the remaining, alive workers. This is taken care internally, so users do not have to worry about this. @@ -90,6 +92,29 @@ Add the following lines in `config.properties` to use launcher with its default cpu_launcher_enable=true ``` +## TorchServe with Intel® Extension for PyTorch* and Intel GPUs + +TorchServe can also leverage Intel GPU for acceleration, providing additional performance benefits. To use TorchServe with Intel GPU, the machine must have the latest oneAPI Base Kit installed, activated, and ipex GPU installed. + + +### Installation and Setup for Intel GPU Support +**Install Intel oneAPI Base Kit:** Follow the installation instructions for your operating system from the [Intel oneAPI Basekit Installation](https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit-download.htm). + +**Install the ipex GPU package to enable TorchServe to utilize Intel GPU for acceleration:** Follow the installation instructions for your operating system from the [ Intel® Extension for PyTorch* XPU/GPU Installation](https://intel.github.io/intel-extension-for-pytorch/index.html#installation?platform=gpu). + +**Activate the Intel oneAPI Base Kit:** Activate the Intel oneAPI Base Kit using the following command: + ```bash + source /path/to/oneapi/setvars.sh + ``` + +**Enable Intel GPU Support in TorchServe:** To enable TorchServe to use Intel GPUs, set the following configuration in `config.properties`: + ``` + ipex_enable=true + ipex_gpu_enable=true + ``` + + + ## Creating and Exporting INT8 model for Intel® Extension for PyTorch* Intel® Extension for PyTorch* supports both eager and torchscript mode. In this section, we show how to deploy INT8 model for Intel® Extension for PyTorch*. Refer to [here](https://github.com/intel/intel-extension-for-pytorch/blob/master/docs/tutorials/features/int8_overview.md) for more details on Intel® Extension for PyTorch* optimizations for quantization. From bbbb626b5befcead5837ee9efb08a63f61fe5327 Mon Sep 17 00:00:00 2001 From: "Navulla, Vamshi Krishna" Date: Fri, 3 May 2024 00:43:09 +0000 Subject: [PATCH 04/23] Final Commits 1 --- .../org/pytorch/serve/util/ConfigManager.java | 9 +- ts/metrics/system_metrics.py | 142 ++++-------------- ts/torch_handler/base_handler.py | 6 +- 3 files changed, 37 insertions(+), 120 deletions(-) diff --git a/frontend/server/src/main/java/org/pytorch/serve/util/ConfigManager.java b/frontend/server/src/main/java/org/pytorch/serve/util/ConfigManager.java index 43d57de4bc..7444927c00 100644 --- a/frontend/server/src/main/java/org/pytorch/serve/util/ConfigManager.java +++ b/frontend/server/src/main/java/org/pytorch/serve/util/ConfigManager.java @@ -857,7 +857,6 @@ private static String getCanonicalPath(String path) { private static int getAvailableGpu() { try { - System.out.println("getAvailableGpu 1"); List gpuIds = new ArrayList<>(); String visibleCuda = System.getenv("CUDA_VISIBLE_DEVICES"); @@ -890,7 +889,6 @@ private static int getAvailableGpu() { throw new AssertionError("Unexpected response."); } else { - System.out.println("getAvailableGpu 2"); try { Process process = @@ -910,13 +908,12 @@ private static int getAvailableGpu() { }catch (IOException | InterruptedException e) { System.out.println("nvidia-smi not available or failed: " + e.getMessage()); } - System.out.println("getAvailableGpu 3"); + try { Process process = Runtime.getRuntime().exec("xpu-smi discovery --dump 1"); int ret = process.waitFor(); if (ret != 0) { return 0; } - System.out.println("Checking for Intel GPUs"); List list = IOUtils.readLines(process.getInputStream(), StandardCharsets.UTF_8); if (list.isEmpty() || !list.get(0).contains("Device ID")) { @@ -925,11 +922,13 @@ private static int getAvailableGpu() { for (int i = 1; i < list.size(); i++) { gpuIds.add(Integer.parseInt(list.get(i))); } + }catch (IOException | InterruptedException e) { + System.out.println("xpu-smi not available or failed: " + e.getMessage()); + } } - System.out.println("Number of GPUs found :"+ gpuIds.size()); return gpuIds.size(); } catch (IOException | InterruptedException e) { return 0; diff --git a/ts/metrics/system_metrics.py b/ts/metrics/system_metrics.py index 7b5633fd5a..d775edcf09 100644 --- a/ts/metrics/system_metrics.py +++ b/ts/metrics/system_metrics.py @@ -4,10 +4,9 @@ import logging import types from builtins import str -import time import psutil -import subprocess + from ts.metrics.dimension import Dimension from ts.metrics.metric import Metric @@ -62,119 +61,42 @@ def gpu_utilization(num_of_gpu): # pylint: disable=wrong-import-position # pylint: disable=import-outside-toplevel - # import nvgpu - # import pynvml - # from nvgpu import list_gpus + import nvgpu + import pynvml + from nvgpu import list_gpus # pylint: enable=wrong-import-position # pylint: enable=import-outside-toplevel - # info = nvgpu.gpu_info() - # for value in info: - # dimension_gpu = [ - # Dimension("Level", "Host"), - # Dimension("device_id", value["index"]), - # ] - # system_metrics.append( - # Metric( - # "GPUMemoryUtilization", - # value["mem_used_percent"], - # "percent", - # dimension_gpu, - # ) - # ) - # system_metrics.append( - # Metric("GPUMemoryUsed", value["mem_used"], "MB", dimension_gpu) - # ) - - # try: - # statuses = list_gpus.device_statuses() - # except pynvml.nvml.NVMLError_NotSupported: - # logging.error("gpu device monitoring not supported") - # statuses = [] - - # for idx, status in enumerate(statuses): - # dimension_gpu = [Dimension("Level", "Host"), Dimension("device_id", idx)] - # system_metrics.append( - # Metric("GPUUtilization", status["utilization"], "percent", dimension_gpu) - # ) - - logging.info(f"XPU Utillization: {num_of_gpu}") - start_time = time.time() - timeout = 1 - try: - # Run the xpu-smi command to get GPU metrics - process = subprocess.Popen( - ["xpu-smi", "dump", "-d", "0", "-m", "0,5"], - stdout=subprocess.PIPE, - text=True # Ensures that output is in text form - ) - output_lines = [] - while True: - current_time = time.time() - if current_time - start_time > timeout: - break - - # Try to read a line of output - lines = process.stdout.readline() - if not lines: - break - output_lines.append(lines.strip()) - - # You can process lines here or later - print(lines.strip()) # Example of processing output in real-time - - # Parse the output to extract GPU metrics - headers = output_lines[0].split(', ') - data_lines = output_lines[1:] - for line in data_lines: - values = line.split(', ') - if len(values) != len(headers): - logging.error(f"Data format error in line: {line}") - continue - - # Create a dictionary for easy access to each column - data_dict = dict(zip(headers, values)) - - # Extract necessary data - timestamp = data_dict["Timestamp"] - device_id = data_dict["DeviceId"] - gpu_utilization = data_dict["GPU Utilization (%)"] - memory_utilization = data_dict["GPU Memory Utilization (%)"] - - # Create dimensions - dimensions = [ - Dimension("Level", "Host"), - Dimension("DeviceId", device_id) - ] - - # Append GPU Utilization Metric - system_metrics.append( - Metric( - "GPUUtilization", - gpu_utilization, - "percent", - dimensions - ) - ) - - # Append GPU Memory Utilization Metric - system_metrics.append( - Metric( - "GPUMemoryUtilization", - memory_utilization, - "percent", - dimensions - ) + info = nvgpu.gpu_info() + for value in info: + dimension_gpu = [ + Dimension("Level", "Host"), + Dimension("device_id", value["index"]), + ] + system_metrics.append( + Metric( + "GPUMemoryUtilization", + value["mem_used_percent"], + "percent", + dimension_gpu, ) - # logging.info(f"Added metric: {system_metrics[-1]}") - # logging.info(f"Added metric: {system_metrics[-2]}") - + ) + system_metrics.append( + Metric("GPUMemoryUsed", value["mem_used"], "MB", dimension_gpu) + ) - except FileNotFoundError: - logging.error("xpu-smi command not found. Cannot collect Intel GPU metrics.") - except subprocess.CalledProcessError as e: - logging.error("Error running xpu-smi command: %s", e) + try: + statuses = list_gpus.device_statuses() + except pynvml.nvml.NVMLError_NotSupported: + logging.error("gpu device monitoring not supported") + statuses = [] + + for idx, status in enumerate(statuses): + dimension_gpu = [Dimension("Level", "Host"), Dimension("device_id", idx)] + system_metrics.append( + Metric("GPUUtilization", status["utilization"], "percent", dimension_gpu) + ) def collect_all(mod, num_of_gpu): @@ -201,4 +123,4 @@ def collect_all(mod, num_of_gpu): for met in system_metrics: logging.info(str(met)) - logging.info("") + logging.info("") \ No newline at end of file diff --git a/ts/torch_handler/base_handler.py b/ts/torch_handler/base_handler.py index 96d2298675..0904559933 100644 --- a/ts/torch_handler/base_handler.py +++ b/ts/torch_handler/base_handler.py @@ -62,12 +62,11 @@ if os.environ.get("TS_IPEX_ENABLE", "false") == "true": try: import intel_extension_for_pytorch as ipex - # print("Succesfully imported ipex TS_IPEX_GPU_ENABLE :", os.environ.get("TS_IPEX_GPU_ENABLE", "false")) IPEX_AVAILABLE = True if torch.xpu.is_available() and os.environ.get("TS_IPEX_GPU_ENABLE", "false") == "true": if torch.xpu.is_available(): IPEX_GPU = True - logger.info("Torch support for Intel GPU enabled") + logger.info("Torchserve support for Intel GPU enabled") else: IPEX_GPU = False except ImportError as error: @@ -257,7 +256,6 @@ def initialize(self, context): self.model = self.model.to(memory_format=torch.channels_last) if IPEX_GPU: self.model = self.model.to("xpu") # IPEX GPU - logger.info("Model loaded on GPU") self.model = ipex.optimize(self.model) logger.info(f"Compiled model with ipex") @@ -373,9 +371,7 @@ def inference(self, data, *args, **kwargs): """ with torch.inference_mode(): if IPEX_AVAILABLE and IPEX_GPU: # IPEX GPU - logger.info("GPU Enabled") marshalled_data = data.to("xpu") - # print(marshalled_data, "Data on Device") else: marshalled_data = data.to(self.device) results = self.model(marshalled_data, *args, **kwargs) From d7f0c8f00d7801812b92f50c8a89fc0503d7ef7e Mon Sep 17 00:00:00 2001 From: "Navulla, Vamshi Krishna" Date: Fri, 3 May 2024 00:51:15 +0000 Subject: [PATCH 05/23] Style: Refactor code formatting --- .../org/pytorch/serve/util/ConfigManager.java | 132 +++++++++--------- 1 file changed, 66 insertions(+), 66 deletions(-) diff --git a/frontend/server/src/main/java/org/pytorch/serve/util/ConfigManager.java b/frontend/server/src/main/java/org/pytorch/serve/util/ConfigManager.java index 7444927c00..a132a09a68 100644 --- a/frontend/server/src/main/java/org/pytorch/serve/util/ConfigManager.java +++ b/frontend/server/src/main/java/org/pytorch/serve/util/ConfigManager.java @@ -856,83 +856,83 @@ private static String getCanonicalPath(String path) { } private static int getAvailableGpu() { - try { + try { - List gpuIds = new ArrayList<>(); - String visibleCuda = System.getenv("CUDA_VISIBLE_DEVICES"); - if (visibleCuda != null && !visibleCuda.isEmpty()) { - String[] ids = visibleCuda.split(","); - for (String id : ids) { - gpuIds.add(Integer.parseInt(id)); - } - } else if (System.getProperty("os.name").startsWith("Mac")) { - Process process = Runtime.getRuntime().exec("system_profiler SPDisplaysDataType"); - int ret = process.waitFor(); - if (ret != 0) { + List gpuIds = new ArrayList<>(); + String visibleCuda = System.getenv("CUDA_VISIBLE_DEVICES"); + if (visibleCuda != null && !visibleCuda.isEmpty()) { + String[] ids = visibleCuda.split(","); + for (String id : ids) { + gpuIds.add(Integer.parseInt(id)); + } + } else if (System.getProperty("os.name").startsWith("Mac")) { + Process process = Runtime.getRuntime().exec("system_profiler SPDisplaysDataType"); + int ret = process.waitFor(); + if (ret != 0) { + return 0; + } + + BufferedReader reader = + new BufferedReader(new InputStreamReader(process.getInputStream())); + String line; + while ((line = reader.readLine()) != null) { + if (line.contains("Chipset Model:") && !line.contains("Apple M1")) { return 0; } - - BufferedReader reader = - new BufferedReader(new InputStreamReader(process.getInputStream())); - String line; - while ((line = reader.readLine()) != null) { - if (line.contains("Chipset Model:") && !line.contains("Apple M1")) { - return 0; - } - if (line.contains("Total Number of Cores:")) { - String[] parts = line.split(":"); - if (parts.length >= 2) { - return (Integer.parseInt(parts[1].trim())); - } + if (line.contains("Total Number of Cores:")) { + String[] parts = line.split(":"); + if (parts.length >= 2) { + return (Integer.parseInt(parts[1].trim())); } } - throw new AssertionError("Unexpected response."); - } else { + } + throw new AssertionError("Unexpected response."); + } else { - - try { - Process process = - Runtime.getRuntime().exec("nvidia-smi --query-gpu=index --format=csv"); - int ret = process.waitFor(); + + try { + Process process = + Runtime.getRuntime().exec("nvidia-smi --query-gpu=index --format=csv"); + int ret = process.waitFor(); + if (ret != 0) { + return 0; + } + List list = + IOUtils.readLines(process.getInputStream(), StandardCharsets.UTF_8); + if (list.isEmpty() || !"index".equals(list.get(0))) { + throw new AssertionError("Unexpected nvidia-smi response."); + } + for (int i = 1; i < list.size(); i++) { + gpuIds.add(Integer.parseInt(list.get(i))); + } + }catch (IOException | InterruptedException e) { + System.out.println("nvidia-smi not available or failed: " + e.getMessage()); + } + try { + Process process = Runtime.getRuntime().exec("xpu-smi discovery --dump 1"); + int ret = process.waitFor(); if (ret != 0) { return 0; } - List list = - IOUtils.readLines(process.getInputStream(), StandardCharsets.UTF_8); - if (list.isEmpty() || !"index".equals(list.get(0))) { - throw new AssertionError("Unexpected nvidia-smi response."); - } - for (int i = 1; i < list.size(); i++) { - gpuIds.add(Integer.parseInt(list.get(i))); - } - }catch (IOException | InterruptedException e) { - System.out.println("nvidia-smi not available or failed: " + e.getMessage()); - } - try { - Process process = Runtime.getRuntime().exec("xpu-smi discovery --dump 1"); - int ret = process.waitFor(); - if (ret != 0) { - return 0; - } - List list = - IOUtils.readLines(process.getInputStream(), StandardCharsets.UTF_8); - if (list.isEmpty() || !list.get(0).contains("Device ID")) { - throw new AssertionError("Unexpected xpu-smi response."); - } - for (int i = 1; i < list.size(); i++) { - gpuIds.add(Integer.parseInt(list.get(i))); - } - }catch (IOException | InterruptedException e) { - System.out.println("xpu-smi not available or failed: " + e.getMessage()); - } - - - + List list = + IOUtils.readLines(process.getInputStream(), StandardCharsets.UTF_8); + if (list.isEmpty() || !list.get(0).contains("Device ID")) { + throw new AssertionError("Unexpected xpu-smi response."); + } + for (int i = 1; i < list.size(); i++) { + gpuIds.add(Integer.parseInt(list.get(i))); } - return gpuIds.size(); - } catch (IOException | InterruptedException e) { - return 0; + }catch (IOException | InterruptedException e) { + System.out.println("xpu-smi not available or failed: " + e.getMessage()); + } + + + } + return gpuIds.size(); + } catch (IOException | InterruptedException e) { + return 0; + } } public List getAllowedUrls() { From 881572be3e273bf3101b2d6d3cf5e35c615788c2 Mon Sep 17 00:00:00 2001 From: "Navulla, Vamshi Krishna" Date: Fri, 3 May 2024 01:09:25 +0000 Subject: [PATCH 06/23] Readme Updated --- examples/intel_extension_for_pytorch/README.md | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/examples/intel_extension_for_pytorch/README.md b/examples/intel_extension_for_pytorch/README.md index 797ae556e3..a0222867d0 100644 --- a/examples/intel_extension_for_pytorch/README.md +++ b/examples/intel_extension_for_pytorch/README.md @@ -98,16 +98,23 @@ TorchServe can also leverage Intel GPU for acceleration, providing additional pe ### Installation and Setup for Intel GPU Support -**Install Intel oneAPI Base Kit:** Follow the installation instructions for your operating system from the [Intel oneAPI Basekit Installation](https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit-download.htm). +**Install Intel oneAPI Base Kit:** +Follow the installation instructions for your operating system from the [Intel oneAPI Basekit Installation](https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit-download.htm). -**Install the ipex GPU package to enable TorchServe to utilize Intel GPU for acceleration:** Follow the installation instructions for your operating system from the [ Intel® Extension for PyTorch* XPU/GPU Installation](https://intel.github.io/intel-extension-for-pytorch/index.html#installation?platform=gpu). +**Install the ipex GPU package to enable TorchServe to utilize Intel GPU for acceleration:** +Follow the installation instructions for your operating system from the [ Intel® Extension for PyTorch* XPU/GPU Installation](https://intel.github.io/intel-extension-for-pytorch/index.html#installation?platform=gpu). -**Activate the Intel oneAPI Base Kit:** Activate the Intel oneAPI Base Kit using the following command: +**Activate the Intel oneAPI Base Kit:** +Activate the Intel oneAPI Base Kit using the following command: ```bash source /path/to/oneapi/setvars.sh ``` -**Enable Intel GPU Support in TorchServe:** To enable TorchServe to use Intel GPUs, set the following configuration in `config.properties`: +**Install xpu-smi:** +Install xpu-smi to let torchserve detect the number of Intel GPU devices present. xpu-smi provides information about the Intel GPU, including temperature, utilization, and other metrics.[xpu-smi Installation Guide](https://dgpu-docs.intel.com/driver/installation.html#ubuntu-package-repository) + +**Enable Intel GPU Support in TorchServe:** +To enable TorchServe to use Intel GPUs, set the following configuration in `config.properties`: ``` ipex_enable=true ipex_gpu_enable=true From e5f3e6a0e2789b7ca745446d3bd069b2ec93f475 Mon Sep 17 00:00:00 2001 From: "Navulla, Vamshi Krishna" Date: Fri, 3 May 2024 01:29:39 +0000 Subject: [PATCH 07/23] Code Refactoring --- ts/metrics/system_metrics.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ts/metrics/system_metrics.py b/ts/metrics/system_metrics.py index d775edcf09..64ee7880c0 100644 --- a/ts/metrics/system_metrics.py +++ b/ts/metrics/system_metrics.py @@ -122,5 +122,4 @@ def collect_all(mod, num_of_gpu): for met in system_metrics: logging.info(str(met)) - logging.info("") \ No newline at end of file From e91db6540dec354f3ee96aa984c84152d8e96bf7 Mon Sep 17 00:00:00 2001 From: root Date: Fri, 3 May 2024 03:40:30 +0000 Subject: [PATCH 08/23] Code Refactoring --- ts/metrics/system_metrics.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ts/metrics/system_metrics.py b/ts/metrics/system_metrics.py index 64ee7880c0..e0a21f1c4f 100644 --- a/ts/metrics/system_metrics.py +++ b/ts/metrics/system_metrics.py @@ -122,4 +122,5 @@ def collect_all(mod, num_of_gpu): for met in system_metrics: logging.info(str(met)) - logging.info("") \ No newline at end of file + + logging.info("") From 45f971f39fa3862c1fd5b414b74d8d3930d2a890 Mon Sep 17 00:00:00 2001 From: "Navulla, Vamshi Krishna" Date: Fri, 3 May 2024 19:37:18 +0000 Subject: [PATCH 09/23] Final Commit --- ts/torch_handler/base_handler.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ts/torch_handler/base_handler.py b/ts/torch_handler/base_handler.py index 0904559933..98a6b14939 100644 --- a/ts/torch_handler/base_handler.py +++ b/ts/torch_handler/base_handler.py @@ -64,7 +64,6 @@ import intel_extension_for_pytorch as ipex IPEX_AVAILABLE = True if torch.xpu.is_available() and os.environ.get("TS_IPEX_GPU_ENABLE", "false") == "true": - if torch.xpu.is_available(): IPEX_GPU = True logger.info("Torchserve support for Intel GPU enabled") else: From ae0318427cbe7751b1151257f56a549b36d4f0d8 Mon Sep 17 00:00:00 2001 From: "Navulla, Vamshi Krishna" Date: Tue, 14 May 2024 02:20:05 +0000 Subject: [PATCH 10/23] self.device mapping to XPU --- ts/torch_handler/base_handler.py | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/ts/torch_handler/base_handler.py b/ts/torch_handler/base_handler.py index c735c2caf6..09ef065aff 100644 --- a/ts/torch_handler/base_handler.py +++ b/ts/torch_handler/base_handler.py @@ -71,11 +71,6 @@ try: import intel_extension_for_pytorch as ipex IPEX_AVAILABLE = True - if torch.xpu.is_available() and os.environ.get("TS_IPEX_GPU_ENABLE", "false") == "true": - IPEX_GPU = True - logger.info("Torchserve support for Intel GPU enabled") - else: - IPEX_GPU = False except ImportError as error: logger.warning( "IPEX is enabled but intel-extension-for-pytorch is not installed. Proceeding without IPEX." @@ -157,12 +152,17 @@ def initialize(self, context): self.model_yaml_config = context.model_yaml_config properties = context.system_properties - + print("gpuId BaseHandler" , properties.get("gpu_id")) if torch.cuda.is_available() and properties.get("gpu_id") is not None: self.map_location = "cuda" self.device = torch.device( self.map_location + ":" + str(properties.get("gpu_id")) ) + elif torch.xpu.is_available() and properties.get("gpu_id") is not None and os.environ.get("TS_IPEX_GPU_ENABLE", "false") == "true": + self.map_location = "xpu" + self.device = torch.device( + self.map_location + ":" + str(properties.get("gpu_id")) + ) elif torch.backends.mps.is_available() and properties.get("gpu_id") is not None: self.map_location = "mps" self.device = torch.device("mps") @@ -261,8 +261,7 @@ def initialize(self, context): elif IPEX_AVAILABLE: self.model = self.model.to(memory_format=torch.channels_last) - if IPEX_GPU: - self.model = self.model.to("xpu") # IPEX GPU + self.model = self.model.to(self.device) self.model = ipex.optimize(self.model) logger.info(f"Compiled model with ipex") @@ -377,10 +376,7 @@ def inference(self, data, *args, **kwargs): Torch Tensor : The Predicted Torch Tensor is returned in this function. """ with torch.inference_mode(): - if IPEX_AVAILABLE and IPEX_GPU: # IPEX GPU - marshalled_data = data.to("xpu") - else: - marshalled_data = data.to(self.device) + marshalled_data = data.to(self.device) results = self.model(marshalled_data, *args, **kwargs) return results From d64f3148f87e6d1290df7cbc0d03eedc2c2d5336 Mon Sep 17 00:00:00 2001 From: "Navulla, Vamshi Krishna" Date: Tue, 14 May 2024 04:07:54 +0000 Subject: [PATCH 11/23] Code Refactoring --- ts/torch_handler/base_handler.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ts/torch_handler/base_handler.py b/ts/torch_handler/base_handler.py index 09ef065aff..2e3c480a76 100644 --- a/ts/torch_handler/base_handler.py +++ b/ts/torch_handler/base_handler.py @@ -152,7 +152,6 @@ def initialize(self, context): self.model_yaml_config = context.model_yaml_config properties = context.system_properties - print("gpuId BaseHandler" , properties.get("gpu_id")) if torch.cuda.is_available() and properties.get("gpu_id") is not None: self.map_location = "cuda" self.device = torch.device( From a4564da08745be339902ddd97772b6136445cd14 Mon Sep 17 00:00:00 2001 From: "Navulla, Vamshi Krishna" Date: Wed, 15 May 2024 15:45:35 +0000 Subject: [PATCH 12/23] Mulitple GPU device engagement enabled --- ts/torch_handler/base_handler.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ts/torch_handler/base_handler.py b/ts/torch_handler/base_handler.py index 2e3c480a76..d5cd85d13f 100644 --- a/ts/torch_handler/base_handler.py +++ b/ts/torch_handler/base_handler.py @@ -150,7 +150,7 @@ def initialize(self, context): if context is not None and hasattr(context, "model_yaml_config"): self.model_yaml_config = context.model_yaml_config - + properties = context.system_properties if torch.cuda.is_available() and properties.get("gpu_id") is not None: self.map_location = "cuda" @@ -162,6 +162,7 @@ def initialize(self, context): self.device = torch.device( self.map_location + ":" + str(properties.get("gpu_id")) ) + torch.xpu.device(self.device) elif torch.backends.mps.is_available() and properties.get("gpu_id") is not None: self.map_location = "mps" self.device = torch.device("mps") From 9188deb4911633a1d2a3f7f97e80ff7343fbc492 Mon Sep 17 00:00:00 2001 From: Kanya-Mo <167922169+Kanya-Mo@users.noreply.github.com> Date: Tue, 28 May 2024 17:12:05 -0700 Subject: [PATCH 13/23] Remove unused changes --- .../org/pytorch/serve/util/ConfigManager.java | 29 ++----------------- 1 file changed, 2 insertions(+), 27 deletions(-) diff --git a/frontend/server/src/main/java/org/pytorch/serve/util/ConfigManager.java b/frontend/server/src/main/java/org/pytorch/serve/util/ConfigManager.java index b266020022..7a82bd0d63 100644 --- a/frontend/server/src/main/java/org/pytorch/serve/util/ConfigManager.java +++ b/frontend/server/src/main/java/org/pytorch/serve/util/ConfigManager.java @@ -943,10 +943,7 @@ private static int getAvailableGpu() { // No MPS devices detected return 0; } else { - - - try { - Process process = + Process process = Runtime.getRuntime().exec("nvidia-smi --query-gpu=index --format=csv"); int ret = process.waitFor(); if (ret != 0) { @@ -960,30 +957,8 @@ private static int getAvailableGpu() { for (int i = 1; i < list.size(); i++) { gpuIds.add(Integer.parseInt(list.get(i))); } - }catch (IOException | InterruptedException e) { - System.out.println("nvidia-smi not available or failed: " + e.getMessage()); - } - try { - Process process = Runtime.getRuntime().exec("xpu-smi discovery --dump 1"); - int ret = process.waitFor(); - if (ret != 0) { - return 0; - } - List list = - IOUtils.readLines(process.getInputStream(), StandardCharsets.UTF_8); - if (list.isEmpty() || !list.get(0).contains("Device ID")) { - throw new AssertionError("Unexpected xpu-smi response."); - } - for (int i = 1; i < list.size(); i++) { - gpuIds.add(Integer.parseInt(list.get(i))); - } - }catch (IOException | InterruptedException e) { - System.out.println("xpu-smi not available or failed: " + e.getMessage()); - } - - - } + return gpuIds.size(); } catch (IOException | InterruptedException e) { return 0; From e3be79c7aee1fb1eb9cf29433e4dfcc9f5dde131 Mon Sep 17 00:00:00 2001 From: "Mo, Kanya" Date: Wed, 5 Jun 2024 17:12:32 -0700 Subject: [PATCH 14/23] Revert "Remove unused changes" This reverts commit 9188deb4911633a1d2a3f7f97e80ff7343fbc492. --- .../org/pytorch/serve/util/ConfigManager.java | 29 +++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/frontend/server/src/main/java/org/pytorch/serve/util/ConfigManager.java b/frontend/server/src/main/java/org/pytorch/serve/util/ConfigManager.java index 7a82bd0d63..b266020022 100644 --- a/frontend/server/src/main/java/org/pytorch/serve/util/ConfigManager.java +++ b/frontend/server/src/main/java/org/pytorch/serve/util/ConfigManager.java @@ -943,7 +943,10 @@ private static int getAvailableGpu() { // No MPS devices detected return 0; } else { - Process process = + + + try { + Process process = Runtime.getRuntime().exec("nvidia-smi --query-gpu=index --format=csv"); int ret = process.waitFor(); if (ret != 0) { @@ -957,8 +960,30 @@ private static int getAvailableGpu() { for (int i = 1; i < list.size(); i++) { gpuIds.add(Integer.parseInt(list.get(i))); } - } + }catch (IOException | InterruptedException e) { + System.out.println("nvidia-smi not available or failed: " + e.getMessage()); + } + try { + Process process = Runtime.getRuntime().exec("xpu-smi discovery --dump 1"); + int ret = process.waitFor(); + if (ret != 0) { + return 0; + } + List list = + IOUtils.readLines(process.getInputStream(), StandardCharsets.UTF_8); + if (list.isEmpty() || !list.get(0).contains("Device ID")) { + throw new AssertionError("Unexpected xpu-smi response."); + } + for (int i = 1; i < list.size(); i++) { + gpuIds.add(Integer.parseInt(list.get(i))); + } + }catch (IOException | InterruptedException e) { + System.out.println("xpu-smi not available or failed: " + e.getMessage()); + } + + + } return gpuIds.size(); } catch (IOException | InterruptedException e) { return 0; From 5fe76453373b7ccd0fefaf980ce2bab2e6f4e1ed Mon Sep 17 00:00:00 2001 From: Anup Renikunta Date: Thu, 13 Jun 2024 22:13:12 -0700 Subject: [PATCH 15/23] Add performance gain info for GPU --- .../intel_extension_for_pytorch/README.md | 56 +++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/examples/intel_extension_for_pytorch/README.md b/examples/intel_extension_for_pytorch/README.md index a0222867d0..f80d7f3f85 100644 --- a/examples/intel_extension_for_pytorch/README.md +++ b/examples/intel_extension_for_pytorch/README.md @@ -10,6 +10,7 @@ Here we show how to use TorchServe with Intel® Extension for PyTorch*. * [Serving model with Intel® Extension for PyTorch*](https://github.com/pytorch/serve/blob/master/examples/intel_extension_for_pytorch/README.md#serving-model-with-intel-extension-for-pytorch) * [TorchServe with Launcher](#torchserve-with-launcher) * [TorchServe with Intel® Extension for PyTorch* and Intel GPUs](#torchserve-with-intel®-extension-for-pytorch-and-intel-gpus) +* [Performance Gain with Intel® Extension for PyTorch* and Intel GPU](https://github.com/pytorch/serve/blob/master/examples/intel_extension_for_pytorch/README.md#performance-gain-with-intel-extension-for-pytorch-and-intel-gpu) * [Creating and Exporting INT8 model for Intel® Extension for PyTorch*](https://github.com/pytorch/serve/blob/master/examples/intel_extension_for_pytorch/README.md#creating-and-exporting-int8-model-for-intel-extension-for-pytorch) * [Benchmarking with Launcher](#benchmarking-with-launcher) * [Performance Boost with Intel® Extension for PyTorch* and Launcher](https://github.com/pytorch/serve/blob/master/examples/intel_extension_for_pytorch/README.md#performance-boost-with-intel-extension-for-pytorch-and-launcher) @@ -120,6 +121,61 @@ To enable TorchServe to use Intel GPUs, set the following configuration in `conf ipex_gpu_enable=true ``` +## Performance Gain with Intel® Extension for PyTorch* and Intel GPU + +To understand the performance gain using Intel GPU, Torchserve recommended [apache benchmark](https://github.com/pytorch/serve/tree/master/benchmarks#benchmarking-with-apache-bench) is executed on FastRCNN FP32 model. + +A `model_config.json` file is created, and the following parameters are added: + +``` +{ + "url": "https://torchserve.pytorch.org/mar_files/fastrcnn.mar", + "requests": "10000", + "concurrency": "100", + "workers": "1", + "batch_delay": "100", + "batch_size": "1", + "input": "../examples/image_classifier/kitten.jpg", + "backend_profiling": "FALSE", + "exec_env": "local" +} +``` + +Batch size can be changed according to the requirement. + +Following lines are added to the `config.properties` to utilize IPEX and Intel GPU: + +``` +ipex_enable=true +ipex_gpu_enable=true +``` + +To reproduce the test, use the following command: + +``` +python benchmark-ab.py --config model_config.json --config_properties config.properties +``` + +This test is performed on a server containing Intel(R) Core (TM) i5-9600K CPU + Intel(R) Arc(TM) A770 Graphics and is compared with a Intel(R) Xeon(R) Gold 6438Y CPU server. +It is recommended to use only 1 worker per GPU, more than 1 worker per GPU is not validated and may cause performance degradation due to context switching. + + +| Model | Batch size | CPU Throughput | GPU Throughput | CPU TS Latency mean | GPU TS Latency mean | Throughput speed up ratio | Latency speed up ratio | +|:-----:|:----------:|:--------------:|:--------------:|:-------------------:|:-------------------:|:-------------------------:|:----------------------:| +| FastRCNN_FP32 | 1 | 15.74 | 2.89 | 6352.388 | 34636.68 | 5.45 | 5.45 | +| | 2 | 17.69 | 2.67 | 5651.999 | 37520.781 | 6.63 | 6.64 | +| | 4 | 18.57 | 2.39 | 5385.389 | 41886.902 | 7.77 | 7.78 | +| | 8 | 18.68 | 2.32 | 5354.58 | 43146.797 | 8.05 | 8.06 | +| | 16 | 19.26 | 2.39 | 5193.307 | 41903.752 | 8.06 | 8.07 | +| | 32 | 19.06 | 2.49 | 5245.912 | 40172.39 | 7.65 | 7.66 | + +

+ +

+Above graph shows the speedup ratio of throughput and latency while using Intel GPU. The speedup ratio is increasing steadily reaching almost 8x till batch size 8 and gives diminishing returns after. Further increasing the batch size to 64 results in `RuntimeError: Native API failed. Native API returns: -5 (PI_ERROR_OUT_OF_RESOURCES)` error as GPU is overloaded. + +Note: The optimal configuration will vary depending on the hardware used. +``` ## Creating and Exporting INT8 model for Intel® Extension for PyTorch* From e029268040cc25596fb1512ac3362cc2658f4b00 Mon Sep 17 00:00:00 2001 From: Anup Renikunta Date: Thu, 13 Jun 2024 22:17:03 -0700 Subject: [PATCH 16/23] Update README.md --- examples/intel_extension_for_pytorch/README.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/examples/intel_extension_for_pytorch/README.md b/examples/intel_extension_for_pytorch/README.md index f80d7f3f85..32f9f24adc 100644 --- a/examples/intel_extension_for_pytorch/README.md +++ b/examples/intel_extension_for_pytorch/README.md @@ -175,8 +175,6 @@ It is recommended to use only 1 worker per GPU, more than 1 worker per GPU is no Above graph shows the speedup ratio of throughput and latency while using Intel GPU. The speedup ratio is increasing steadily reaching almost 8x till batch size 8 and gives diminishing returns after. Further increasing the batch size to 64 results in `RuntimeError: Native API failed. Native API returns: -5 (PI_ERROR_OUT_OF_RESOURCES)` error as GPU is overloaded. Note: The optimal configuration will vary depending on the hardware used. -``` - ## Creating and Exporting INT8 model for Intel® Extension for PyTorch* Intel® Extension for PyTorch* supports both eager and torchscript mode. In this section, we show how to deploy INT8 model for Intel® Extension for PyTorch*. Refer to [here](https://github.com/intel/intel-extension-for-pytorch/blob/master/docs/tutorials/features/int8_overview.md) for more details on Intel® Extension for PyTorch* optimizations for quantization. From a8bdb478b965640b19a4d7c1052b1b15800bdefc Mon Sep 17 00:00:00 2001 From: Anup Renikunta Date: Thu, 13 Jun 2024 22:23:59 -0700 Subject: [PATCH 17/23] Add units to table --- examples/intel_extension_for_pytorch/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/intel_extension_for_pytorch/README.md b/examples/intel_extension_for_pytorch/README.md index 32f9f24adc..bbfd0fc637 100644 --- a/examples/intel_extension_for_pytorch/README.md +++ b/examples/intel_extension_for_pytorch/README.md @@ -160,7 +160,7 @@ This test is performed on a server containing Intel(R) Core (TM) i5-9600K CPU + It is recommended to use only 1 worker per GPU, more than 1 worker per GPU is not validated and may cause performance degradation due to context switching. -| Model | Batch size | CPU Throughput | GPU Throughput | CPU TS Latency mean | GPU TS Latency mean | Throughput speed up ratio | Latency speed up ratio | +| Model | Batch size | CPU Throughput(img/sec) | GPU Throughput(img/sec) | CPU TS Latency mean(ms) | GPU TS Latency mean(ms) | Throughput speedup ratio | Latency speedup ratio | |:-----:|:----------:|:--------------:|:--------------:|:-------------------:|:-------------------:|:-------------------------:|:----------------------:| | FastRCNN_FP32 | 1 | 15.74 | 2.89 | 6352.388 | 34636.68 | 5.45 | 5.45 | | | 2 | 17.69 | 2.67 | 5651.999 | 37520.781 | 6.63 | 6.64 | From ee5187d56f169de9aa52f693c1c567f9061b52f1 Mon Sep 17 00:00:00 2001 From: "Mo, Kanya" Date: Tue, 18 Jun 2024 20:24:40 -0700 Subject: [PATCH 18/23] Update metric reading configuration. --- examples/intel_extension_for_pytorch/README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/examples/intel_extension_for_pytorch/README.md b/examples/intel_extension_for_pytorch/README.md index bbfd0fc637..3096d421ff 100644 --- a/examples/intel_extension_for_pytorch/README.md +++ b/examples/intel_extension_for_pytorch/README.md @@ -120,6 +120,10 @@ To enable TorchServe to use Intel GPUs, set the following configuration in `conf ipex_enable=true ipex_gpu_enable=true ``` +To enable metric reading, additionally set: + ``` + system_metrics_cmd=ts/metrics/intel_gpu_metric_collector.py --gpu ${Number of GPUs} + ``` ## Performance Gain with Intel® Extension for PyTorch* and Intel GPU From 578dfab511e5dd0a95fba2bc5e4e7604ea9ee126 Mon Sep 17 00:00:00 2001 From: "Mo, Kanya" Date: Thu, 20 Jun 2024 10:56:35 -0700 Subject: [PATCH 19/23] Update system metrics script path. --- examples/intel_extension_for_pytorch/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/intel_extension_for_pytorch/README.md b/examples/intel_extension_for_pytorch/README.md index 3096d421ff..91a38302e6 100644 --- a/examples/intel_extension_for_pytorch/README.md +++ b/examples/intel_extension_for_pytorch/README.md @@ -122,7 +122,7 @@ To enable TorchServe to use Intel GPUs, set the following configuration in `conf ``` To enable metric reading, additionally set: ``` - system_metrics_cmd=ts/metrics/intel_gpu_metric_collector.py --gpu ${Number of GPUs} + system_metrics_cmd=${PATH to examples/intel_extension_for_pytorch/intel_gpu_metric_collector.py} --gpu ${Number of GPUs} ``` ## Performance Gain with Intel® Extension for PyTorch* and Intel GPU From e48f2c10b5a9022972b2484d23430a4a3b2db2d1 Mon Sep 17 00:00:00 2001 From: Kanya-Mo <167922169+Kanya-Mo@users.noreply.github.com> Date: Fri, 21 Jun 2024 15:06:57 -0700 Subject: [PATCH 20/23] Update ConfigManager.java --- .../src/main/java/org/pytorch/serve/util/ConfigManager.java | 1 - 1 file changed, 1 deletion(-) diff --git a/frontend/server/src/main/java/org/pytorch/serve/util/ConfigManager.java b/frontend/server/src/main/java/org/pytorch/serve/util/ConfigManager.java index 34540f4d52..1f772dd25f 100644 --- a/frontend/server/src/main/java/org/pytorch/serve/util/ConfigManager.java +++ b/frontend/server/src/main/java/org/pytorch/serve/util/ConfigManager.java @@ -496,7 +496,6 @@ public int getJobQueueSize() { } public int getNumberOfGpu() { - // return 1; return getIntProperty(TS_NUMBER_OF_GPU, 0); } From bca4012056a74179be4adfaf83593abb8c56a22c Mon Sep 17 00:00:00 2001 From: "Mo, Kanya" Date: Fri, 21 Jun 2024 17:40:23 -0700 Subject: [PATCH 21/23] Reformat ConfigManager.java --- .../org/pytorch/serve/util/ConfigManager.java | 67 +++++++++---------- 1 file changed, 31 insertions(+), 36 deletions(-) diff --git a/frontend/server/src/main/java/org/pytorch/serve/util/ConfigManager.java b/frontend/server/src/main/java/org/pytorch/serve/util/ConfigManager.java index 1f772dd25f..cbf9daa74a 100644 --- a/frontend/server/src/main/java/org/pytorch/serve/util/ConfigManager.java +++ b/frontend/server/src/main/java/org/pytorch/serve/util/ConfigManager.java @@ -162,7 +162,7 @@ public final class ConfigManager { private static Pattern pattern = Pattern.compile("\\$\\$([^$]+[^$])\\$\\$"); private Pattern blacklistPattern; - private Properties prop; + private Properties prop; private boolean snapshotDisabled; @@ -273,7 +273,6 @@ private ConfigManager(Arguments args) throws IOException { getAvailableGpu(), getIntProperty(TS_NUMBER_OF_GPU, Integer.MAX_VALUE)))); - String pythonExecutable = args.getPythonExecutable(); if (pythonExecutable != null) { prop.setProperty(PYTHON_EXECUTABLE, pythonExecutable); @@ -497,7 +496,7 @@ public int getJobQueueSize() { public int getNumberOfGpu() { return getIntProperty(TS_NUMBER_OF_GPU, 0); - } + } public boolean getModelControlMode() { return Boolean.parseBoolean(getProperty(MODEL_CONTROL_MODE, "false")); @@ -653,7 +652,7 @@ public String getCertificateFile() { public String getSystemMetricsCmd() { return prop.getProperty(SYSTEM_METRICS_CMD, ""); } - + public SslContext getSslContext() throws IOException, GeneralSecurityException { List supportedCiphers = Arrays.asList( @@ -962,51 +961,47 @@ private static int getAvailableGpu() { return 0; } else { - try { Process process = - Runtime.getRuntime().exec("nvidia-smi --query-gpu=index --format=csv"); - int ret = process.waitFor(); - if (ret != 0) { - return 0; - } - List list = - IOUtils.readLines(process.getInputStream(), StandardCharsets.UTF_8); - if (list.isEmpty() || !"index".equals(list.get(0))) { - throw new AssertionError("Unexpected nvidia-smi response."); - } - for (int i = 1; i < list.size(); i++) { - gpuIds.add(Integer.parseInt(list.get(i))); - } - }catch (IOException | InterruptedException e) { - System.out.println("nvidia-smi not available or failed: " + e.getMessage()); + Runtime.getRuntime().exec("nvidia-smi --query-gpu=index --format=csv"); + int ret = process.waitFor(); + if (ret != 0) { + return 0; + } + List list = + IOUtils.readLines(process.getInputStream(), StandardCharsets.UTF_8); + if (list.isEmpty() || !"index".equals(list.get(0))) { + throw new AssertionError("Unexpected nvidia-smi response."); + } + for (int i = 1; i < list.size(); i++) { + gpuIds.add(Integer.parseInt(list.get(i))); + } + } catch (IOException | InterruptedException e) { + System.out.println("nvidia-smi not available or failed: " + e.getMessage()); } try { - Process process = Runtime.getRuntime().exec("xpu-smi discovery --dump 1"); - int ret = process.waitFor(); + Process process = Runtime.getRuntime().exec("xpu-smi discovery --dump 1"); + int ret = process.waitFor(); if (ret != 0) { return 0; } - List list = - IOUtils.readLines(process.getInputStream(), StandardCharsets.UTF_8); - if (list.isEmpty() || !list.get(0).contains("Device ID")) { - throw new AssertionError("Unexpected xpu-smi response."); - } - for (int i = 1; i < list.size(); i++) { - gpuIds.add(Integer.parseInt(list.get(i))); - } - }catch (IOException | InterruptedException e) { - System.out.println("xpu-smi not available or failed: " + e.getMessage()); + List list = + IOUtils.readLines(process.getInputStream(), StandardCharsets.UTF_8); + if (list.isEmpty() || !list.get(0).contains("Device ID")) { + throw new AssertionError("Unexpected xpu-smi response."); + } + for (int i = 1; i < list.size(); i++) { + gpuIds.add(Integer.parseInt(list.get(i))); + } + } catch (IOException | InterruptedException e) { + System.out.println("xpu-smi not available or failed: " + e.getMessage()); } - - - } return gpuIds.size(); } catch (IOException | InterruptedException e) { return 0; } - } + } public List getAllowedUrls() { String allowedURL = prop.getProperty(TS_ALLOWED_URLS, DEFAULT_TS_ALLOWED_URLS); From 86f7ced25ea4d8c90f960055fccd6a536f9ef637 Mon Sep 17 00:00:00 2001 From: "Mo, Kanya" Date: Mon, 24 Jun 2024 11:21:45 -0700 Subject: [PATCH 22/23] Fix spelling issues --- examples/intel_extension_for_pytorch/README.md | 2 +- ts_scripts/spellcheck_conf/wordlist.txt | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/intel_extension_for_pytorch/README.md b/examples/intel_extension_for_pytorch/README.md index 91a38302e6..46310e1cb3 100644 --- a/examples/intel_extension_for_pytorch/README.md +++ b/examples/intel_extension_for_pytorch/README.md @@ -100,7 +100,7 @@ TorchServe can also leverage Intel GPU for acceleration, providing additional pe ### Installation and Setup for Intel GPU Support **Install Intel oneAPI Base Kit:** -Follow the installation instructions for your operating system from the [Intel oneAPI Basekit Installation](https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit-download.htm). +Follow the installation instructions for your operating system from the [Intel oneAPI Base kit Installation](https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit-download.htm). **Install the ipex GPU package to enable TorchServe to utilize Intel GPU for acceleration:** Follow the installation instructions for your operating system from the [ Intel® Extension for PyTorch* XPU/GPU Installation](https://intel.github.io/intel-extension-for-pytorch/index.html#installation?platform=gpu). diff --git a/ts_scripts/spellcheck_conf/wordlist.txt b/ts_scripts/spellcheck_conf/wordlist.txt index 5f5c727749..1f077cf66a 100644 --- a/ts_scripts/spellcheck_conf/wordlist.txt +++ b/ts_scripts/spellcheck_conf/wordlist.txt @@ -16,6 +16,7 @@ torchvision ul usecase CUDA +XPU JDK NVIDIA WSL From 927483da3af65bfed31b048f80f76c3bb4e89a95 Mon Sep 17 00:00:00 2001 From: "Mo, Kanya" Date: Mon, 24 Jun 2024 12:04:24 -0700 Subject: [PATCH 23/23] Fix lint changed file. --- ts/torch_handler/base_handler.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/ts/torch_handler/base_handler.py b/ts/torch_handler/base_handler.py index 8f2579fb45..29af1224e4 100644 --- a/ts/torch_handler/base_handler.py +++ b/ts/torch_handler/base_handler.py @@ -20,7 +20,6 @@ load_label_mapping, ) - if packaging.version.parse(torch.__version__) >= packaging.version.parse("1.8.1"): from torch.profiler import ProfilerActivity, profile, record_function @@ -70,6 +69,7 @@ if os.environ.get("TS_IPEX_ENABLE", "false") == "true": try: import intel_extension_for_pytorch as ipex + IPEX_AVAILABLE = True except ImportError as error: logger.warning( @@ -79,7 +79,7 @@ else: IPEX_AVAILABLE = False - + try: import onnxruntime as ort import psutil @@ -147,22 +147,26 @@ def initialize(self, context): RuntimeError: Raises the Runtime error when the model.py is missing """ - + if context is not None and hasattr(context, "model_yaml_config"): self.model_yaml_config = context.model_yaml_config - + properties = context.system_properties if torch.cuda.is_available() and properties.get("gpu_id") is not None: self.map_location = "cuda" self.device = torch.device( self.map_location + ":" + str(properties.get("gpu_id")) ) - elif torch.xpu.is_available() and properties.get("gpu_id") is not None and os.environ.get("TS_IPEX_GPU_ENABLE", "false") == "true": + elif ( + torch.xpu.is_available() + and properties.get("gpu_id") is not None + and os.environ.get("TS_IPEX_GPU_ENABLE", "false") == "true" + ): self.map_location = "xpu" self.device = torch.device( self.map_location + ":" + str(properties.get("gpu_id")) ) - torch.xpu.device(self.device) + torch.xpu.device(self.device) elif torch.backends.mps.is_available() and properties.get("gpu_id") is not None: self.map_location = "mps" self.device = torch.device("mps") @@ -566,4 +570,4 @@ def get_device(self): Returns: string : self device """ - return self.device \ No newline at end of file + return self.device