From aeaa1d650f2dbc5e8b88570cb7fd5bb163680e4d Mon Sep 17 00:00:00 2001
From: Mike Guo <myguo@microsoft.com>
Date: Fri, 30 Jun 2023 20:19:51 +0800
Subject: [PATCH] make optimized_model_path be in temp folder instead of source
 model folder for transformer optimization (#16531)

### The optimize_model will generate a temporary model in current model
folder. Most of time, it is fine.

However, the scenario will break when the function run against input
model mount from AzureML. In that case, the mounted folder is read-only.
We have to copy the model to another temp folder to call optimize_model
to workaround this issue. Otherwise, the optimize_model will fail when
creating the optimized model in the read-only folder. However, the model
copy is painful, especially when model is huge.

This PR just expose the optimized_model_path at optimize_model level so
that the caller could decide where to save the temp model.
---
 onnxruntime/python/tools/transformers/optimizer.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/onnxruntime/python/tools/transformers/optimizer.py b/onnxruntime/python/tools/transformers/optimizer.py
index d870c447b86f..e1bff135db8d 100644
--- a/onnxruntime/python/tools/transformers/optimizer.py
+++ b/onnxruntime/python/tools/transformers/optimizer.py
@@ -20,6 +20,7 @@
 import argparse
 import logging
 import os
+import tempfile
 from typing import Dict, Optional
 
 import coloredlogs
@@ -252,6 +253,9 @@ def optimize_model(
     # stable.
     disabled_optimizers = ["ConstantSharing"]
     temp_model_path = None
+    temp_dir = tempfile.TemporaryDirectory()
+    optimized_model_name = "model_o{}_{}.onnx".format(opt_level, "gpu" if use_gpu else "cpu")
+    optimized_model_path = os.path.join(temp_dir.name, optimized_model_name)
     if opt_level > 1:
         # Disable some optimizers that might cause failure in symbolic shape inference or attention fusion.
         disabled_optimizers += (
@@ -271,6 +275,7 @@ def optimize_model(
             opt_level=opt_level,
             disabled_optimizers=disabled_optimizers,
             verbose=verbose,
+            optimized_model_path=optimized_model_path,
         )
     elif opt_level == 1:
         # basic optimizations (like constant folding and cast elimination) are not specified to execution provider.
@@ -281,6 +286,7 @@ def optimize_model(
             opt_level=1,
             disabled_optimizers=disabled_optimizers,
             verbose=verbose,
+            optimized_model_path=optimized_model_path,
         )
 
     if only_onnxruntime and not temp_model_path:
@@ -293,10 +299,8 @@ def optimize_model(
     else:
         optimizer = optimize_by_fusion(model, model_type, num_heads, hidden_size, optimization_options)
 
-    # Remove the temporary model.
-    if temp_model_path:
-        os.remove(temp_model_path)
-        logger.debug(f"Remove temporary model: {temp_model_path}")
+    # remove the temporary directory
+    temp_dir.cleanup()
 
     return optimizer