From 8d197ba63128a4cdbbb8627e48e7f1b4f150330c Mon Sep 17 00:00:00 2001 From: gfyoung Date: Thu, 6 Jul 2017 15:46:00 -0700 Subject: [PATCH] BUG/MAINT: Change default of inplace to False in pd.eval (#16732) --- doc/source/whatsnew/v0.21.0.txt | 48 +++++++++++++++ pandas/core/computation/eval.py | 87 ++++++++++++++++++--------- pandas/core/frame.py | 13 ++-- pandas/tests/computation/test_eval.py | 50 ++++++++++++--- 4 files changed, 150 insertions(+), 48 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 4b97fb83cb13b..95eab9e3b684f 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -45,6 +45,52 @@ Other Enhancements Backwards incompatible API changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Improved error handling during item assignment in pd.eval +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. _whatsnew_0210.api_breaking.pandas_eval: + +:func:`eval` will now raise a ``ValueError`` when item assignment malfunctions, or +inplace operations are specified, but there is no item assignment in the expression (:issue:`16732`) + +.. ipython:: python + + arr = np.array([1, 2, 3]) + +Previously, if you attempted the following expression, you would get a not very helpful error message: + +.. code-block:: ipython + + In [3]: pd.eval("a = 1 + 2", target=arr, inplace=True) + ... + IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) + and integer or boolean arrays are valid indices + +This is a very long way of saying numpy arrays don't support string-item indexing. With this +change, the error message is now this: + +.. code-block:: python + + In [3]: pd.eval("a = 1 + 2", target=arr, inplace=True) + ... + ValueError: Cannot assign expression output to target + +It also used to be possible to evaluate expressions inplace, even if there was no item assignment: + +.. code-block:: ipython + + In [4]: pd.eval("1 + 2", target=arr, inplace=True) + Out[4]: 3 + +However, this input does not make much sense because the output is not being assigned to +the target. Now, a ``ValueError`` will be raised when such an input is passed in: + +.. code-block:: ipython + + In [4]: pd.eval("1 + 2", target=arr, inplace=True) + ... + ValueError: Cannot operate inplace if there is no assignment + - Support has been dropped for Python 3.4 (:issue:`15251`) - The Categorical constructor no longer accepts a scalar for the ``categories`` keyword. (:issue:`16022`) - Accessing a non-existent attribute on a closed :class:`HDFStore` will now @@ -79,6 +125,7 @@ Removal of prior version deprecations/changes - The ``pd.options.display.mpl_style`` configuration has been dropped (:issue:`12190`) - ``Index`` has dropped the ``.sym_diff()`` method in favor of ``.symmetric_difference()`` (:issue:`12591`) - ``Categorical`` has dropped the ``.order()`` and ``.sort()`` methods in favor of ``.sort_values()`` (:issue:`12882`) +- :func:`eval` and :method:`DataFrame.eval` have changed the default of ``inplace`` from ``None`` to ``False`` (:issue:`11149`) .. _whatsnew_0210.performance: @@ -145,3 +192,4 @@ Categorical Other ^^^^^ +- Bug in :func:`eval` where the ``inplace`` parameter was being incorrectly handled (:issue:`16732`) diff --git a/pandas/core/computation/eval.py b/pandas/core/computation/eval.py index 22e376306280a..ef15e886fd554 100644 --- a/pandas/core/computation/eval.py +++ b/pandas/core/computation/eval.py @@ -3,7 +3,6 @@ """Top level ``eval`` module. """ -import warnings import tokenize from pandas.io.formats.printing import pprint_thing from pandas.core.computation import _NUMEXPR_INSTALLED @@ -148,7 +147,7 @@ def _check_for_locals(expr, stack_level, parser): def eval(expr, parser='pandas', engine=None, truediv=True, local_dict=None, global_dict=None, resolvers=(), level=0, - target=None, inplace=None): + target=None, inplace=False): """Evaluate a Python expression as a string using various backends. The following arithmetic operations are supported: ``+``, ``-``, ``*``, @@ -205,20 +204,40 @@ def eval(expr, parser='pandas', engine=None, truediv=True, level : int, optional The number of prior stack frames to traverse and add to the current scope. Most users will **not** need to change this parameter. - target : a target object for assignment, optional, default is None - essentially this is a passed in resolver - inplace : bool, default True - If expression mutates, whether to modify object inplace or return - copy with mutation. - - WARNING: inplace=None currently falls back to to True, but - in a future version, will default to False. Use inplace=True - explicitly rather than relying on the default. + target : object, optional, default None + This is the target object for assignment. It is used when there is + variable assignment in the expression. If so, then `target` must + support item assignment with string keys, and if a copy is being + returned, it must also support `.copy()`. + inplace : bool, default False + If `target` is provided, and the expression mutates `target`, whether + to modify `target` inplace. Otherwise, return a copy of `target` with + the mutation. Returns ------- ndarray, numeric scalar, DataFrame, Series + Raises + ------ + ValueError + There are many instances where such an error can be raised: + + - `target=None`, but the expression is multiline. + - The expression is multiline, but not all them have item assignment. + An example of such an arrangement is this: + + a = b + 1 + a + 2 + + Here, there are expressions on different lines, making it multiline, + but the last line has no variable assigned to the output of `a + 2`. + - `inplace=True`, but the expression is missing item assignment. + - Item assignment is provided, but the `target` does not support + string item assignment. + - Item assignment is provided and `inplace=False`, but the `target` + does not support the `.copy()` method + Notes ----- The ``dtype`` of any objects involved in an arithmetic ``%`` operation are @@ -232,8 +251,9 @@ def eval(expr, parser='pandas', engine=None, truediv=True, pandas.DataFrame.query pandas.DataFrame.eval """ - inplace = validate_bool_kwarg(inplace, 'inplace') - first_expr = True + + inplace = validate_bool_kwarg(inplace, "inplace") + if isinstance(expr, string_types): _check_expression(expr) exprs = [e.strip() for e in expr.splitlines() if e.strip() != ''] @@ -245,7 +265,10 @@ def eval(expr, parser='pandas', engine=None, truediv=True, raise ValueError("multi-line expressions are only valid in the " "context of data, use DataFrame.eval") + ret = None first_expr = True + target_modified = False + for expr in exprs: expr = _convert_expression(expr) engine = _check_engine(engine) @@ -266,28 +289,33 @@ def eval(expr, parser='pandas', engine=None, truediv=True, eng_inst = eng(parsed_expr) ret = eng_inst.evaluate() - if parsed_expr.assigner is None and multi_line: - raise ValueError("Multi-line expressions are only valid" - " if all expressions contain an assignment") + if parsed_expr.assigner is None: + if multi_line: + raise ValueError("Multi-line expressions are only valid" + " if all expressions contain an assignment") + elif inplace: + raise ValueError("Cannot operate inplace " + "if there is no assignment") # assign if needed if env.target is not None and parsed_expr.assigner is not None: - if inplace is None: - warnings.warn( - "eval expressions containing an assignment currently" - "default to operating inplace.\nThis will change in " - "a future version of pandas, use inplace=True to " - "avoid this warning.", - FutureWarning, stacklevel=3) - inplace = True + target_modified = True # if returning a copy, copy only on the first assignment if not inplace and first_expr: - target = env.target.copy() + try: + target = env.target.copy() + except AttributeError: + raise ValueError("Cannot return a copy of the target") else: target = env.target - target[parsed_expr.assigner] = ret + # TypeError is most commonly raised (e.g. int, list), but you + # get IndexError if you try to do this assignment on np.ndarray. + try: + target[parsed_expr.assigner] = ret + except (TypeError, IndexError): + raise ValueError("Cannot assign expression output to target") if not resolvers: resolvers = ({parsed_expr.assigner: ret},) @@ -304,7 +332,6 @@ def eval(expr, parser='pandas', engine=None, truediv=True, ret = None first_expr = False - if not inplace and inplace is not None: - return target - - return ret + # We want to exclude `inplace=None` as being False. + if inplace is False: + return target if target_modified else ret diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 2b2e7be62427b..80cdebc24c39d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2224,7 +2224,7 @@ def query(self, expr, inplace=False, **kwargs): else: return new_data - def eval(self, expr, inplace=None, **kwargs): + def eval(self, expr, inplace=False, **kwargs): """Evaluate an expression in the context of the calling DataFrame instance. @@ -2232,13 +2232,10 @@ def eval(self, expr, inplace=None, **kwargs): ---------- expr : string The expression string to evaluate. - inplace : bool - If the expression contains an assignment, whether to return a new - DataFrame or mutate the existing. - - WARNING: inplace=None currently falls back to to True, but - in a future version, will default to False. Use inplace=True - explicitly rather than relying on the default. + inplace : bool, default False + If the expression contains an assignment, whether to perform the + operation inplace and mutate the existing DataFrame. Otherwise, + a new DataFrame is returned. .. versionadded:: 0.18.0 diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index 89ab4531877a4..589f612802fb9 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -1311,14 +1311,6 @@ def assignment_not_inplace(self): expected['c'] = expected['a'] + expected['b'] tm.assert_frame_equal(df, expected) - # Default for inplace will change - with tm.assert_produces_warnings(FutureWarning): - df.eval('c = a + b') - - # but don't warn without assignment - with tm.assert_produces_warnings(None): - df.eval('a + b') - def test_multi_line_expression(self): # GH 11149 df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}) @@ -1388,14 +1380,52 @@ def test_assignment_in_query(self): df.query('a = 1') assert_frame_equal(df, df_orig) - def query_inplace(self): - # GH 11149 + def test_query_inplace(self): + # see gh-11149 df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}) expected = df.copy() expected = expected[expected['a'] == 2] df.query('a == 2', inplace=True) assert_frame_equal(expected, df) + df = {} + expected = {"a": 3} + + self.eval("a = 1 + 2", target=df, inplace=True) + tm.assert_dict_equal(df, expected) + + @pytest.mark.parametrize("invalid_target", [1, "cat", [1, 2], + np.array([]), (1, 3)]) + def test_cannot_item_assign(self, invalid_target): + msg = "Cannot assign expression output to target" + expression = "a = 1 + 2" + + with tm.assert_raises_regex(ValueError, msg): + self.eval(expression, target=invalid_target, inplace=True) + + if hasattr(invalid_target, "copy"): + with tm.assert_raises_regex(ValueError, msg): + self.eval(expression, target=invalid_target, inplace=False) + + @pytest.mark.parametrize("invalid_target", [1, "cat", (1, 3)]) + def test_cannot_copy_item(self, invalid_target): + msg = "Cannot return a copy of the target" + expression = "a = 1 + 2" + + with tm.assert_raises_regex(ValueError, msg): + self.eval(expression, target=invalid_target, inplace=False) + + @pytest.mark.parametrize("target", [1, "cat", [1, 2], + np.array([]), (1, 3), {1: 2}]) + def test_inplace_no_assignment(self, target): + expression = "1 + 2" + + assert self.eval(expression, target=target, inplace=False) == 3 + + msg = "Cannot operate inplace if there is no assignment" + with tm.assert_raises_regex(ValueError, msg): + self.eval(expression, target=target, inplace=True) + def test_basic_period_index_boolean_expression(self): df = mkdf(2, 2, data_gen_f=f, c_idx_type='p', r_idx_type='i')