From f1ed46133a1d6c5f04a8e754cf97d358be249b5a Mon Sep 17 00:00:00 2001 From: Elron Bandel Date: Thu, 5 Sep 2024 18:29:00 +0300 Subject: [PATCH 1/3] Update README.md (#1198) --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 141eedf61..43050f960 100644 --- a/README.md +++ b/README.md @@ -31,11 +31,11 @@ https://github.com/IBM/unitxt/assets/23455264/baef9131-39d4-4164-90b2-05da52919f ### 🦄 Currently on Unitxt Catalog -![NLP Tasks](https://img.shields.io/badge/NLP_tasks-40-blue) -![Dataset Cards](https://img.shields.io/badge/Dataset_Cards-457-blue) -![Templates](https://img.shields.io/badge/Templates-229-blue) -![Formats](https://img.shields.io/badge/Formats-18-blue) -![Metrics](https://img.shields.io/badge/Metrics-98-blue) +![NLP Tasks](https://img.shields.io/badge/NLP_tasks-48-blue) +![Dataset Cards](https://img.shields.io/badge/Dataset_Cards-537-blue) +![Templates](https://img.shields.io/badge/Templates-265-blue) +![Formats](https://img.shields.io/badge/Formats-23-blue) +![Metrics](https://img.shields.io/badge/Metrics-136-blue) ### 🦄 Run Unitxt Exploration Dashboard From 7e3caf509348b970f7c3acb09105adce83b63324 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Charchut?= <92163133+MikolajCharchut@users.noreply.github.com> Date: Sun, 8 Sep 2024 10:05:07 +0200 Subject: [PATCH 2/3] add decorator with init warning (#1200) * add decorator with init warning * Empty-Commit for DCO Signed-off-by: Mikolaj Charchut * Empty-Commit for DCO Signed-off-by: Mikolaj Charchut * Empty-Commit for DCO Signed-off-by: Mikolaj Charchut * Empty-Commit for DCO Signed-off-by: Mikolaj Charchut * add message param and UnitxtWarning to decorator Signed-off-by: Mikolaj Charchut --------- Signed-off-by: Mikolaj Charchut Signed-off-by: Mikolaj Charchut Co-authored-by: Mikolaj Charchut --- src/unitxt/deprecation_utils.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/unitxt/deprecation_utils.py b/src/unitxt/deprecation_utils.py index 2cfce92f4..23b2eb832 100644 --- a/src/unitxt/deprecation_utils.py +++ b/src/unitxt/deprecation_utils.py @@ -1,6 +1,7 @@ import functools import warnings +from .error_utils import UnitxtWarning from .settings_utils import get_constants, get_settings constants = get_constants() @@ -98,3 +99,12 @@ def decorator(obj): return depraction_wrapper(func, version, alt_text) return decorator + + +def init_warning(msg=""): + # Decorator that raises warning when class is initialized + def decorator(initiated_class): + UnitxtWarning(msg) + return initiated_class + + return decorator From ba2f04a679defc3097f16bc6b83666d5d497ff4d Mon Sep 17 00:00:00 2001 From: Elron Bandel Date: Sun, 8 Sep 2024 12:35:25 +0300 Subject: [PATCH 3/3] Add mock inference mode setting and allow testing without gen ai key (#1204) * Add mock inference mode setting Signed-off-by: elronbandel * Update Signed-off-by: elronbandel * remove gen ai key Signed-off-by: elronbandel --------- Signed-off-by: elronbandel --- .github/workflows/catalog_consistency.yml | 1 - .github/workflows/catalog_preparation.yml | 1 - src/unitxt/inference.py | 31 ++++++++++++++++++----- src/unitxt/settings_utils.py | 1 + tests/utils.py | 2 +- 5 files changed, 27 insertions(+), 9 deletions(-) diff --git a/.github/workflows/catalog_consistency.yml b/.github/workflows/catalog_consistency.yml index 4ea4005e1..20de8d87f 100644 --- a/.github/workflows/catalog_consistency.yml +++ b/.github/workflows/catalog_consistency.yml @@ -12,7 +12,6 @@ jobs: runs-on: ubuntu-latest env: OS: ubuntu-latest - GENAI_KEY: ${{ secrets.GENAI_KEY }} UNITXT_DEFAULT_VERBOSITY: error DATASETS_VERBOSITY: error HF_HUB_VERBOSITY: error diff --git a/.github/workflows/catalog_preparation.yml b/.github/workflows/catalog_preparation.yml index a3024f409..468513f30 100644 --- a/.github/workflows/catalog_preparation.yml +++ b/.github/workflows/catalog_preparation.yml @@ -12,7 +12,6 @@ jobs: runs-on: ubuntu-latest env: OS: ubuntu-latest - GENAI_KEY: ${{ secrets.GENAI_KEY }} UNITXT_DEFAULT_VERBOSITY: error DATASETS_VERBOSITY: error HF_HUB_VERBOSITY: error diff --git a/src/unitxt/inference.py b/src/unitxt/inference.py index c6b6b26e0..c35bda5fe 100644 --- a/src/unitxt/inference.py +++ b/src/unitxt/inference.py @@ -11,6 +11,9 @@ from .image_operators import extract_images from .logging_utils import get_logger from .operator import PackageRequirementsMixin +from .settings_utils import get_settings + +settings = get_settings() class InferenceEngine(abc.ABC, Artifact): @@ -21,9 +24,20 @@ def _infer(self, dataset): """Perform inference on the input dataset.""" pass + @abc.abstractmethod + def prepare_engine(self): + """Perform inference on the input dataset.""" + pass + + def prepare(self): + if not settings.mock_inference_mode: + self.prepare_engine() + def infer(self, dataset) -> str: """Verifies instances of a dataset and performs inference.""" [self.verify_instance(instance) for instance in dataset] + if settings.mock_inference_mode: + return [instance["source"] for instance in dataset] return self._infer(dataset) @deprecation(version="2.0.0") @@ -122,7 +136,7 @@ def _prepare_pipeline(self): model=self.model_name, trust_remote_code=True, **model_args ) - def prepare(self): + def prepare_engine(self): if not self.lazy_load: self._prepare_pipeline() @@ -144,13 +158,17 @@ def _infer(self, dataset): class MockInferenceEngine(InferenceEngine): model_name: str - def prepare(self): + def prepare_engine(self): return def _infer(self, dataset): return ["[[10]]" for instance in dataset] +class MockModeMixin(Artifact): + mock_mode: bool = False + + class IbmGenAiInferenceEngineParamsMixin(Artifact): beam_width: Optional[int] = None decoding_method: Optional[Literal["greedy", "sample"]] = None @@ -201,11 +219,12 @@ class IbmGenAiInferenceEngine( data_classification_policy = ["public", "proprietary"] parameters: Optional[IbmGenAiInferenceEngineParams] = None - def prepare(self): + def prepare_engine(self): from genai import Client, Credentials api_key_env_var_name = "GENAI_KEY" api_key = os.environ.get(api_key_env_var_name) + assert api_key is not None, ( f"Error while trying to run IbmGenAiInferenceEngine." f" Please set the environment param '{api_key_env_var_name}'." @@ -279,7 +298,7 @@ class OpenAiInferenceEngine( data_classification_policy = ["public"] parameters: Optional[OpenAiInferenceEngineParams] = None - def prepare(self): + def prepare_engine(self): from openai import OpenAI api_key_env_var_name = "OPENAI_API_KEY" @@ -490,7 +509,7 @@ def _initialize_wml_client(self): client.set.default_project(self.credentials["project_id"]) return client - def prepare(self): + def prepare_engine(self): self._client = self._initialize_wml_client() self._set_inference_parameters() @@ -541,7 +560,7 @@ def _prepare_engine(self): self.processor = AutoProcessor.from_pretrained(self.model_name) - def prepare(self): + def prepare_engine(self): if not self.lazy_load: self._prepare_engine() diff --git a/src/unitxt/settings_utils.py b/src/unitxt/settings_utils.py index c6bbd8eac..9018a806c 100644 --- a/src/unitxt/settings_utils.py +++ b/src/unitxt/settings_utils.py @@ -146,6 +146,7 @@ def __getattr__(self, key): settings.seed = (int, 42) settings.skip_artifacts_prepare_and_verify = (bool, False) settings.data_classification_policy = None + settings.mock_inference_mode = (bool, False) if Constants.is_uninitilized(): constants = Constants() diff --git a/tests/utils.py b/tests/utils.py index 1c0c1a9c6..36b76fa73 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -33,7 +33,7 @@ def setUpClass(cls): enable_explicit_format() unitxt.settings.allow_unverified_code = True unitxt.settings.use_only_local_catalogs = True - # unitxt.settings.global_loader_limit = 300 + unitxt.settings.mock_inference_mode = True unitxt.settings.max_log_message_size = 1000000000000 if settings.default_verbosity in ["error", "critical"]: if not sys.warnoptions: