IBM · elronbandel · Sep 24, 2024 · Sep 24, 2024 · Sep 24, 2024
diff --git a/src/unitxt/inference.py b/src/unitxt/inference.py
@@ -559,6 +559,7 @@ class WMLInferenceEngine(
         parameters (WMLInferenceEngineParams, optional): Instance of WMLInferenceEngineParams
             which defines inference parameters and their values. Deprecated attribute, please
             pass respective parameters directly to the WMLInferenceEngine class instead.
+        concurrency_limit (int): number of requests that will be sent in parallel, max is 10.
 
     Examples:
         from .api import load_dataset
@@ -592,7 +593,7 @@ class WMLInferenceEngine(
     }
     data_classification_policy = ["public", "proprietary"]
     parameters: Optional[WMLInferenceEngineParams] = None
-
+    concurrency_limit: int = 10
     _client: Any = InternalField(default=None, name="WML client")
 
     def verify(self):
@@ -666,6 +667,7 @@ def _infer(self, dataset):
         return model.generate_text(
             prompt=dataset["source"],
             params=self.to_dict([WMLInferenceEngineParamsMixin], keep_empty=False),
+            concurrency_limit=self.concurrency_limit,
         )