Add docs

Signed-off-by: Elron Bandel <elron.bandel@ibm.com>
IBM · Mar 19, 2024 · 230503a · 230503a
1 parent b8df44c
commit 230503a
Show file tree

Hide file tree

Showing 2 changed files with 62 additions and 2 deletions.
diff --git a/src/unitxt/type_utils.py b/src/unitxt/type_utils.py
@@ -3,6 +3,46 @@
 import itertools
 import typing
 
+from .utils import safe_eval
+
+
+def parse_type_string(type_string: str) -> typing.Any:
+    """Parses a string representing a Python type hint and evaluates it to return the corresponding type object.
+
+    This function uses a safe evaluation context
+    to mitigate the risks of executing arbitrary code.
+
+    Args:
+        type_string (str): A string representation of a Python type hint. Examples include
+                           'List[int]', 'Dict[str, Any]', 'Optional[List[str]]', etc.
+
+    Returns:
+        typing.Any: The Python type object corresponding to the given type string.
+
+    Raises:
+        ValueError: If the type string contains elements not allowed in the safe context
+                    or tokens list.
+
+    The function uses a predefined safe context with common types from the `typing` module
+    and basic Python data types. It also defines a list of safe tokens that are allowed
+    in the type string.
+    """
+    safe_context = {
+        "Any": typing.Any,
+        "List": typing.List,
+        "Dict": typing.Dict,
+        "Tuple": typing.Tuple,
+        "Union": typing.Union,
+        "int": int,
+        "str": str,
+        "float": float,
+        "bool": bool,
+        "Optional": typing.Optional,
+    }
+
+    safe_tokens = ["[", "]", ",", " "]
+    return safe_eval(type_string, safe_context, safe_tokens)
+
 
 def isoftype(object, type):
     """Checks if an object is of a certain typing type, including nested types.

diff --git a/src/unitxt/utils.py b/src/unitxt/utils.py
@@ -86,8 +86,28 @@ def is_module_available(module_name):
         return False
 
 
-def safe_eval(expression, context, allowd_tokens):
-    allowd_sub_strings = list(context.keys()) + allowd_tokens
+def safe_eval(expression: str, context: dict, allowed_tokens: list) -> any:
+    """Evaluates a given expression in a restricted environment, allowing only specified tokens and context variables.
+
+    Args:
+        expression (str): The expression to evaluate.
+        context (dict): A dictionary mapping variable names to their values, which
+                        can be used in the expression.
+        allowed_tokens (list): A list of strings representing allowed tokens (such as
+                               operators, function names, etc.) that can be used in the expression.
+
+    Returns:
+        any: The result of evaluating the expression.
+
+    Raises:
+        ValueError: If the expression contains tokens not in the allowed list or context keys.
+
+    Note:
+        This function should be used carefully, as it employs `eval`, which can
+        execute arbitrary code. The function attempts to mitigate security risks
+        by restricting the available tokens and not exposing built-in functions.
+    """
+    allowd_sub_strings = list(context.keys()) + allowed_tokens
     if is_made_of_sub_strings(expression, allowd_sub_strings):
         return eval(expression, {"__builtins__": {}}, context)
     raise ValueError(f"Unreocgnized Expression: {expression}")