From 55ababcc35f3458bae3b0268be94fe55c1b81a8b Mon Sep 17 00:00:00 2001 From: rjfs Date: Fri, 27 Dec 2019 02:08:44 +0100 Subject: [PATCH 01/15] BUG: Fixed strange behaviour of pd.DataFrame.drop() with inplace argument (#30484) --- pandas/core/ops/methods.py | 42 ----------------------------------- pandas/tests/base/test_ops.py | 21 ++++++++++++++++++ 2 files changed, 21 insertions(+), 42 deletions(-) diff --git a/pandas/core/ops/methods.py b/pandas/core/ops/methods.py index 8c66eea270c76..4be5bc9ecdd1f 100644 --- a/pandas/core/ops/methods.py +++ b/pandas/core/ops/methods.py @@ -83,48 +83,6 @@ def add_special_arithmetic_methods(cls): new_methods = _create_methods( cls, arith_method, comp_method, bool_method, special=True ) - # inplace operators (I feel like these should get passed an `inplace=True` - # or just be removed - - def _wrap_inplace_method(method): - """ - return an inplace wrapper for this method - """ - - def f(self, other): - result = method(self, other) - - # this makes sure that we are aligned like the input - # we are updating inplace so we want to ignore is_copy - self._update_inplace( - result.reindex_like(self, copy=False)._data, verify_is_copy=False - ) - - return self - - f.__name__ = "__i{name}__".format(name=method.__name__.strip("__")) - return f - - new_methods.update( - dict( - __iadd__=_wrap_inplace_method(new_methods["__add__"]), - __isub__=_wrap_inplace_method(new_methods["__sub__"]), - __imul__=_wrap_inplace_method(new_methods["__mul__"]), - __itruediv__=_wrap_inplace_method(new_methods["__truediv__"]), - __ifloordiv__=_wrap_inplace_method(new_methods["__floordiv__"]), - __imod__=_wrap_inplace_method(new_methods["__mod__"]), - __ipow__=_wrap_inplace_method(new_methods["__pow__"]), - ) - ) - - new_methods.update( - dict( - __iand__=_wrap_inplace_method(new_methods["__and__"]), - __ior__=_wrap_inplace_method(new_methods["__or__"]), - __ixor__=_wrap_inplace_method(new_methods["__xor__"]), - ) - ) - _add_methods(cls, new_methods=new_methods) diff --git a/pandas/tests/base/test_ops.py b/pandas/tests/base/test_ops.py index 4231aa844f282..0f3cb919e90d9 100644 --- a/pandas/tests/base/test_ops.py +++ b/pandas/tests/base/test_ops.py @@ -767,6 +767,27 @@ def test_drop_duplicates_series_vs_dataframe(self): dropped_series = df[column].drop_duplicates(keep=keep) tm.assert_frame_equal(dropped_frame, dropped_series.to_frame()) + def test_inplace_drop_and_add(self): + # GH 30484 + # Get expected df + expected = pd.DataFrame({}) + expected["x1"] = [1, 2, 3, 4, 5] + expected["x2"] = [0, 0, 0, 1, 1] + expected["target"] = [10, 20, 30, 40, 50] + y = expected["target"] + expected.drop("target", axis=1, inplace=True) + y = y + np.min(y) + # Get tested df + df = pd.DataFrame({}) + df["x1"] = [1, 2, 3, 4, 5] + df["x2"] = [0, 0, 0, 1, 1] + df["target"] = [10, 20, 30, 40, 50] + y = df["target"] + df.drop("target", axis=1, inplace=True) + y += np.min(y) + # compare + tm.assert_frame_equal(df, expected) + def test_fillna(self): # # GH 11343 # though Index.fillna and Series.fillna has separate impl, From 1dc7e7297d93f4ce89e2d0049ab4870a6f7216af Mon Sep 17 00:00:00 2001 From: rjfs Date: Fri, 27 Dec 2019 02:13:27 +0100 Subject: [PATCH 02/15] Revert "DOC: standardize wording for changed default args (#30493)" This reverts commit 710df2140555030e4d86e669d6df2deb852bcaf5. --- doc/source/whatsnew/v1.0.0.rst | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 4671170fa79ae..90b970e374a95 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -548,13 +548,13 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more. - :meth:`DataFrame.hist` and :meth:`Series.hist` no longer allows ``figsize="default"``, specify figure size by passinig a tuple instead (:issue:`30003`) - Floordiv of integer-dtyped array by :class:`Timedelta` now raises ``TypeError`` (:issue:`21036`) - :class:`TimedeltaIndex` and :class:`DatetimeIndex` no longer accept non-nanosecond dtype strings like "timedelta64" or "datetime64", use "timedelta64[ns]" and "datetime64[ns]" instead (:issue:`24806`) -- Changed the default "skipna" argument in :func:`pandas.api.types.infer_dtype` from ``False`` to ``True`` (:issue:`24050`) +- :func:`pandas.api.types.infer_dtype` argument ``skipna`` defaults to ``True`` instead of ``False`` (:issue:`24050`) - Removed :attr:`Series.ix` and :attr:`DataFrame.ix` (:issue:`26438`) - Removed :meth:`Index.summary` (:issue:`18217`) - Removed the previously deprecated keyword "fastpath" from the :class:`Index` constructor (:issue:`23110`) - Removed :meth:`Series.get_value`, :meth:`Series.set_value`, :meth:`DataFrame.get_value`, :meth:`DataFrame.set_value` (:issue:`17739`) - Removed :meth:`Series.compound` and :meth:`DataFrame.compound` (:issue:`26405`) -- Changed the default "inplace" argument in :meth:`DataFrame.set_index` and :meth:`Series.set_axis` from ``None`` to ``False`` (:issue:`27600`) +- Changed the the default value of `inplace` in :meth:`DataFrame.set_index` and :meth:`Series.set_axis`. It now defaults to ``False`` (:issue:`27600`) - Removed :attr:`Series.cat.categorical`, :attr:`Series.cat.index`, :attr:`Series.cat.name` (:issue:`24751`) - Removed the previously deprecated keyword "box" from :func:`to_datetime` and :func:`to_timedelta`; in addition these now always returns :class:`DatetimeIndex`, :class:`TimedeltaIndex`, :class:`Index`, :class:`Series`, or :class:`DataFrame` (:issue:`24486`) - :func:`to_timedelta`, :class:`Timedelta`, and :class:`TimedeltaIndex` no longer allow "M", "y", or "Y" for the "unit" argument (:issue:`23264`) @@ -594,7 +594,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more. - :func:`read_excel` no longer allows an integer value for the parameter ``usecols``, instead pass a list of integers from 0 to ``usecols`` inclusive (:issue:`23635`) - Removed the previously deprecated keyword "convert_datetime64" from :meth:`DataFrame.to_records` (:issue:`18902`) - Removed :meth:`IntervalIndex.from_intervals` in favor of the :class:`IntervalIndex` constructor (:issue:`19263`) -- Changed the default "keep_tz" argument in :meth:`DatetimeIndex.to_series` from ``None`` to ``True`` (:issue:`23739`) +- Changed the default value for the "keep_tz" argument in :meth:`DatetimeIndex.to_series` to ``True`` (:issue:`23739`) - Removed :func:`api.types.is_period` and :func:`api.types.is_datetimetz` (:issue:`23917`) - Ability to read pickles containing :class:`Categorical` instances created with pre-0.16 version of pandas has been removed (:issue:`27538`) - Removed :func:`pandas.tseries.plotting.tsplot` (:issue:`18627`) @@ -603,7 +603,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more. - Removed the previously deprecated ``FrozenNDArray`` class in ``pandas.core.indexes.frozen`` (:issue:`29335`) - Removed the previously deprecated keyword "nthreads" from :func:`read_feather`, use "use_threads" instead (:issue:`23053`) - Removed :meth:`Index.is_lexsorted_for_tuple` (:issue:`29305`) -- Removed support for nested renaming in :meth:`DataFrame.aggregate`, :meth:`Series.aggregate`, :meth:`DataFrameGroupBy.aggregate`, :meth:`SeriesGroupBy.aggregate`, :meth:`Rolling.aggregate` (:issue:`29608`) +- Removed support for nexted renaming in :meth:`DataFrame.aggregate`, :meth:`Series.aggregate`, :meth:`DataFrameGroupBy.aggregate`, :meth:`SeriesGroupBy.aggregate`, :meth:`Rolling.aggregate` (:issue:`29608`) - Removed :meth:`Series.valid`; use :meth:`Series.dropna` instead (:issue:`18800`) - Removed :attr:`DataFrame.is_copy`, :attr:`Series.is_copy` (:issue:`18812`) - Removed :meth:`DataFrame.get_ftype_counts`, :meth:`Series.get_ftype_counts` (:issue:`18243`) @@ -615,7 +615,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more. - Removed :meth:`DatetimeIndex.asobject`, :meth:`TimedeltaIndex.asobject`, :meth:`PeriodIndex.asobject`, use ``astype(object)`` instead (:issue:`29801`) - Removed the previously deprecated keyword "order" from :func:`factorize` (:issue:`19751`) - Removed the previously deprecated keyword "encoding" from :func:`read_stata` and :meth:`DataFrame.to_stata` (:issue:`21400`) -- Changed the default "sort" argument in :func:`concat` from ``None`` to ``False`` (:issue:`20613`) +- In :func:`concat` the default value for ``sort`` has been changed from ``None`` to ``False`` (:issue:`20613`) - Removed the previously deprecated keyword "raise_conflict" from :meth:`DataFrame.update`, use "errors" instead (:issue:`23585`) - Removed the previously deprecated keyword "n" from :meth:`DatetimeIndex.shift`, :meth:`TimedeltaIndex.shift`, :meth:`PeriodIndex.shift`, use "periods" instead (:issue:`22458`) - Removed the previously deprecated keywords "how", "fill_method", and "limit" from :meth:`DataFrame.resample` (:issue:`30139`) @@ -631,15 +631,16 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more. - :meth:`Categorical.ravel` returns a :class:`Categorical` instead of a ``ndarray`` (:issue:`27199`) - The 'outer' method on Numpy ufuncs, e.g. ``np.subtract.outer`` operating on :class:`Series` objects is no longer supported, and will raise ``NotImplementedError`` (:issue:`27198`) - Removed :meth:`Series.get_dtype_counts` and :meth:`DataFrame.get_dtype_counts` (:issue:`27145`) -- Changed the default "fill_value" argument in :meth:`Categorical.take` from ``True`` to ``False`` (:issue:`20841`) -- Changed the default value for the `raw` argument in :func:`Series.rolling().apply() `, :func:`DataFrame.rolling().apply() `, :func:`Series.expanding().apply() `, and :func:`DataFrame.expanding().apply() ` from ``None`` to ``False`` (:issue:`20584`) +- Changed the default ``fill_value`` in :meth:`Categorical.take` from ``True`` to ``False`` (:issue:`20841`) +- Changed the default value for the `raw` argument in :func:`Series.rolling().apply() `, :func:`DataFrame.rolling().apply() `, +- :func:`Series.expanding().apply() `, and :func:`DataFrame.expanding().apply() ` to ``False`` (:issue:`20584`) - Removed deprecated behavior of :meth:`Series.argmin` and :meth:`Series.argmax`, use :meth:`Series.idxmin` and :meth:`Series.idxmax` for the old behavior (:issue:`16955`) - Passing a tz-aware ``datetime.datetime`` or :class:`Timestamp` into the :class:`Timestamp` constructor with the ``tz`` argument now raises a ``ValueError`` (:issue:`23621`) - Removed :attr:`Series.base`, :attr:`Index.base`, :attr:`Categorical.base`, :attr:`Series.flags`, :attr:`Index.flags`, :attr:`PeriodArray.flags`, :attr:`Series.strides`, :attr:`Index.strides`, :attr:`Series.itemsize`, :attr:`Index.itemsize`, :attr:`Series.data`, :attr:`Index.data` (:issue:`20721`) - Changed :meth:`Timedelta.resolution` to match the behavior of the standard library ``datetime.timedelta.resolution``, for the old behavior, use :meth:`Timedelta.resolution_string` (:issue:`26839`) - Removed :attr:`Timestamp.weekday_name`, :attr:`DatetimeIndex.weekday_name`, and :attr:`Series.dt.weekday_name` (:issue:`18164`) - Removed the previously deprecated keyword "errors" in :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` (:issue:`22644`) -- Changed the default "ordered" argument in :class:`CategoricalDtype` from ``None`` to ``False`` (:issue:`26336`) +- Changed the default value for ``ordered`` in :class:`CategoricalDtype` from ``None`` to ``False`` (:issue:`26336`) - :meth:`Series.set_axis` and :meth:`DataFrame.set_axis` now require "labels" as the first argument and "axis" as an optional named parameter (:issue:`30089`) - Removed :func:`to_msgpack`, :func:`read_msgpack`, :meth:`DataFrame.to_msgpack`, :meth:`Series.to_msgpack` (:issue:`27103`) - From 665fa272bd5de726960b9a5af9c70f38b0d86cef Mon Sep 17 00:00:00 2001 From: rjfs Date: Sun, 29 Dec 2019 11:55:42 +0100 Subject: [PATCH 03/15] moved new test to test_operators.py and added new bool argument to NDFrame._maybe_update_cacher --- pandas/core/generic.py | 14 +++++---- pandas/core/ops/methods.py | 44 ++++++++++++++++++++++++++++ pandas/tests/base/test_ops.py | 21 ------------- pandas/tests/frame/test_operators.py | 22 ++++++++++++++ 4 files changed, 75 insertions(+), 26 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 08c7f38ce4c82..30b6aa78abac1 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3215,7 +3215,8 @@ def _get_cacher(self): return cacher def _maybe_update_cacher( - self, clear: bool_t = False, verify_is_copy: bool_t = True + self, clear: bool_t = False, verify_is_copy: bool_t = True, + change_cache: bool_t = True ) -> None: """ See if we need to update our parent cacher if clear, then clear our @@ -3227,8 +3228,9 @@ def _maybe_update_cacher( Clear the item cache. verify_is_copy : bool, default True Provide is_copy checks. + change_cache: bool, default True + If True, then cache may be changed """ - cacher = getattr(self, "_cacher", None) if cacher is not None: ref = cacher[1]() @@ -3237,7 +3239,7 @@ def _maybe_update_cacher( # a copy if ref is None: del self._cacher - else: + elif change_cache: # Note: we need to call ref._maybe_cache_changed even in the # case where it will raise. (Uh, not clear why) try: @@ -3931,7 +3933,7 @@ def _drop_axis(self, labels, axis, level=None, errors: str = "raise"): return result - def _update_inplace(self, result, verify_is_copy: bool_t = True) -> None: + def _update_inplace(self, result, verify_is_copy: bool_t = True, **kwargs) -> None: """ Replace self internals with result. @@ -3939,6 +3941,8 @@ def _update_inplace(self, result, verify_is_copy: bool_t = True) -> None: ---------- verify_is_copy : bool, default True Provide is_copy checks. + **kwargs + Passed to self._maybe_update_cacher """ # NOTE: This does *not* call __finalize__ and that's an explicit # decision that we may revisit in the future. @@ -3946,7 +3950,7 @@ def _update_inplace(self, result, verify_is_copy: bool_t = True) -> None: self._reset_cache() self._clear_item_cache() self._data = getattr(result, "_data", result) - self._maybe_update_cacher(verify_is_copy=verify_is_copy) + self._maybe_update_cacher(verify_is_copy=verify_is_copy, **kwargs) def add_prefix(self, prefix: str): """ diff --git a/pandas/core/ops/methods.py b/pandas/core/ops/methods.py index 4be5bc9ecdd1f..74ba82ebd3fdf 100644 --- a/pandas/core/ops/methods.py +++ b/pandas/core/ops/methods.py @@ -83,6 +83,50 @@ def add_special_arithmetic_methods(cls): new_methods = _create_methods( cls, arith_method, comp_method, bool_method, special=True ) + # inplace operators (I feel like these should get passed an `inplace=True` + # or just be removed + + def _wrap_inplace_method(method): + """ + return an inplace wrapper for this method + """ + + def f(self, other): + result = method(self, other) + + # this makes sure that we are aligned like the input + # we are updating inplace so we want to ignore is_copy + self._update_inplace( + result.reindex_like(self, copy=False)._data, + verify_is_copy=False, + change_cache=False + ) + + return self + + f.__name__ = "__i{name}__".format(name=method.__name__.strip("__")) + return f + + new_methods.update( + dict( + __iadd__=_wrap_inplace_method(new_methods["__add__"]), + __isub__=_wrap_inplace_method(new_methods["__sub__"]), + __imul__=_wrap_inplace_method(new_methods["__mul__"]), + __itruediv__=_wrap_inplace_method(new_methods["__truediv__"]), + __ifloordiv__=_wrap_inplace_method(new_methods["__floordiv__"]), + __imod__=_wrap_inplace_method(new_methods["__mod__"]), + __ipow__=_wrap_inplace_method(new_methods["__pow__"]), + ) + ) + + new_methods.update( + dict( + __iand__=_wrap_inplace_method(new_methods["__and__"]), + __ior__=_wrap_inplace_method(new_methods["__or__"]), + __ixor__=_wrap_inplace_method(new_methods["__xor__"]), + ) + ) + _add_methods(cls, new_methods=new_methods) diff --git a/pandas/tests/base/test_ops.py b/pandas/tests/base/test_ops.py index 0f3cb919e90d9..4231aa844f282 100644 --- a/pandas/tests/base/test_ops.py +++ b/pandas/tests/base/test_ops.py @@ -767,27 +767,6 @@ def test_drop_duplicates_series_vs_dataframe(self): dropped_series = df[column].drop_duplicates(keep=keep) tm.assert_frame_equal(dropped_frame, dropped_series.to_frame()) - def test_inplace_drop_and_add(self): - # GH 30484 - # Get expected df - expected = pd.DataFrame({}) - expected["x1"] = [1, 2, 3, 4, 5] - expected["x2"] = [0, 0, 0, 1, 1] - expected["target"] = [10, 20, 30, 40, 50] - y = expected["target"] - expected.drop("target", axis=1, inplace=True) - y = y + np.min(y) - # Get tested df - df = pd.DataFrame({}) - df["x1"] = [1, 2, 3, 4, 5] - df["x2"] = [0, 0, 0, 1, 1] - df["target"] = [10, 20, 30, 40, 50] - y = df["target"] - df.drop("target", axis=1, inplace=True) - y += np.min(y) - # compare - tm.assert_frame_equal(df, expected) - def test_fillna(self): # # GH 11343 # though Index.fillna and Series.fillna has separate impl, diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index a4f1c0688b144..652f80650655b 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -829,6 +829,28 @@ def test_inplace_ops_identity2(self, op): expected = id(df) assert id(df) == expected + def test_inplace_drop_and_add(self): + # GH 30484 + # Get expected df + expected = pd.DataFrame({}) + expected["x1"] = [1, 2, 3, 4, 5] + expected["x2"] = [0, 0, 0, 1, 1] + expected["target"] = [10, 20, 30, 40, 50] + y_expected = expected["target"] + expected = expected.drop("target", axis=1, inplace=False) + y_expected += np.min(y_expected) + # Get tested df + df = pd.DataFrame({}) + df["x1"] = [1, 2, 3, 4, 5] + df["x2"] = [0, 0, 0, 1, 1] + df["target"] = [10, 20, 30, 40, 50] + y = df["target"] + df.drop("target", axis=1, inplace=True) + y += np.min(y) + # compare + tm.assert_frame_equal(df, expected) + tm.assert_series_equal(y, y_expected) + def test_alignment_non_pandas(self): index = ["A", "B", "C"] columns = ["X", "Y", "Z"] From cc5e48090a0cfbf3743b7160eac7e45b18f06a17 Mon Sep 17 00:00:00 2001 From: rjfs Date: Sun, 29 Dec 2019 12:41:06 +0100 Subject: [PATCH 04/15] fixed black formatting --- pandas/core/generic.py | 6 ++++-- pandas/core/ops/methods.py | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 30b6aa78abac1..71e34db7f9026 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3215,8 +3215,10 @@ def _get_cacher(self): return cacher def _maybe_update_cacher( - self, clear: bool_t = False, verify_is_copy: bool_t = True, - change_cache: bool_t = True + self, + clear: bool_t = False, + verify_is_copy: bool_t = True, + change_cache: bool_t = True, ) -> None: """ See if we need to update our parent cacher if clear, then clear our diff --git a/pandas/core/ops/methods.py b/pandas/core/ops/methods.py index 74ba82ebd3fdf..9c82a510b3e43 100644 --- a/pandas/core/ops/methods.py +++ b/pandas/core/ops/methods.py @@ -99,7 +99,7 @@ def f(self, other): self._update_inplace( result.reindex_like(self, copy=False)._data, verify_is_copy=False, - change_cache=False + change_cache=False, ) return self From d2a8b90d4b9dcf28fff7b92441a82910eaad9445 Mon Sep 17 00:00:00 2001 From: rjfs Date: Mon, 27 Jan 2020 22:55:07 +0100 Subject: [PATCH 05/15] moved and refactored new test; implemented new solution by resetting cacher --- pandas/core/ops/methods.py | 3 ++- .../tests/frame/test_axis_select_reindex.py | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/pandas/core/ops/methods.py b/pandas/core/ops/methods.py index 9c82a510b3e43..267616deb0113 100644 --- a/pandas/core/ops/methods.py +++ b/pandas/core/ops/methods.py @@ -93,7 +93,8 @@ def _wrap_inplace_method(method): def f(self, other): result = method(self, other) - + # Delete cacher + self._reset_cacher() # this makes sure that we are aligned like the input # we are updating inplace so we want to ignore is_copy self._update_inplace( diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py index d6ef3a7600abb..403b7cf0979b0 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -1152,3 +1152,22 @@ def test_drop_non_empty_list(self, index, drop_labels): # GH 21494 with pytest.raises(KeyError, match="not found in axis"): pd.DataFrame(index=index).drop(drop_labels) + + @pytest.mark.parametrize( + "operation", ["__iadd__", "__isub__", "__imul__", "__ipow__"] + ) + @pytest.mark.parametrize("inplace", [False, True]) + def test_inplace_drop_and_operation(self, operation, inplace): + # GH 30484 + data_dict = {"x": [1, 2, 3, 4, 5], "y": [10, 20, 30, 40, 50]} + df = pd.DataFrame(data_dict) + y = df["y"] + + if inplace: + df.drop("y", axis=1, inplace=inplace) + else: + df = df.drop("y", axis=1, inplace=inplace) + # Perform operation and ensure that df is not changed + expected = df.copy() + getattr(y, operation)(1) + tm.assert_frame_equal(df, expected) From f0dbe124fe6b7e71ee650df5f6f993a25bf8918a Mon Sep 17 00:00:00 2001 From: rjfs Date: Mon, 27 Jan 2020 22:59:53 +0100 Subject: [PATCH 06/15] removed some old changes --- pandas/core/ops/methods.py | 4 +--- pandas/tests/frame/test_operators.py | 22 ---------------------- 2 files changed, 1 insertion(+), 25 deletions(-) diff --git a/pandas/core/ops/methods.py b/pandas/core/ops/methods.py index 018eb24e21d11..0cf1ac4d107f6 100644 --- a/pandas/core/ops/methods.py +++ b/pandas/core/ops/methods.py @@ -98,9 +98,7 @@ def f(self, other): # this makes sure that we are aligned like the input # we are updating inplace so we want to ignore is_copy self._update_inplace( - result.reindex_like(self, copy=False)._data, - verify_is_copy=False, - change_cache=False, + result.reindex_like(self, copy=False)._data, verify_is_copy=False ) return self diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index e326507b5df6e..55f1216a0efd7 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -850,28 +850,6 @@ def test_inplace_ops_identity2(self, op): expected = id(df) assert id(df) == expected - def test_inplace_drop_and_add(self): - # GH 30484 - # Get expected df - expected = pd.DataFrame({}) - expected["x1"] = [1, 2, 3, 4, 5] - expected["x2"] = [0, 0, 0, 1, 1] - expected["target"] = [10, 20, 30, 40, 50] - y_expected = expected["target"] - expected = expected.drop("target", axis=1, inplace=False) - y_expected += np.min(y_expected) - # Get tested df - df = pd.DataFrame({}) - df["x1"] = [1, 2, 3, 4, 5] - df["x2"] = [0, 0, 0, 1, 1] - df["target"] = [10, 20, 30, 40, 50] - y = df["target"] - df.drop("target", axis=1, inplace=True) - y += np.min(y) - # compare - tm.assert_frame_equal(df, expected) - tm.assert_series_equal(y, y_expected) - def test_alignment_non_pandas(self): index = ["A", "B", "C"] columns = ["X", "Y", "Z"] From 00a5c7fa8b70b3112f7b4c5a7be8225267007afb Mon Sep 17 00:00:00 2001 From: rjfs Date: Mon, 27 Jan 2020 23:02:07 +0100 Subject: [PATCH 07/15] removed some old changes --- pandas/core/generic.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 75e75e56c99d2..02e73c51e6a72 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3150,10 +3150,7 @@ def _get_cacher(self): return cacher def _maybe_update_cacher( - self, - clear: bool_t = False, - verify_is_copy: bool_t = True, - change_cache: bool_t = True, + self, clear: bool_t = False, verify_is_copy: bool_t = True ) -> None: """ See if we need to update our parent cacher if clear, then clear our @@ -3165,9 +3162,8 @@ def _maybe_update_cacher( Clear the item cache. verify_is_copy : bool, default True Provide is_copy checks. - change_cache: bool, default True - If True, then cache may be changed """ + cacher = getattr(self, "_cacher", None) if cacher is not None: ref = cacher[1]() @@ -3176,7 +3172,7 @@ def _maybe_update_cacher( # a copy if ref is None: del self._cacher - elif change_cache: + else: # Note: we need to call ref._maybe_cache_changed even in the # case where it will raise. (Uh, not clear why) try: @@ -3894,7 +3890,7 @@ def _drop_axis( return result - def _update_inplace(self, result, verify_is_copy: bool_t = True, **kwargs) -> None: + def _update_inplace(self, result, verify_is_copy: bool_t = True) -> None: """ Replace self internals with result. From f33f55f3479edbb2ca7e32efe38368bd76142dc2 Mon Sep 17 00:00:00 2001 From: rjfs Date: Mon, 27 Jan 2020 23:02:59 +0100 Subject: [PATCH 08/15] removed some old changes --- pandas/core/generic.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 02e73c51e6a72..a2e348bf98e33 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3898,8 +3898,6 @@ def _update_inplace(self, result, verify_is_copy: bool_t = True) -> None: ---------- verify_is_copy : bool, default True Provide is_copy checks. - **kwargs - Passed to self._maybe_update_cacher """ # NOTE: This does *not* call __finalize__ and that's an explicit # decision that we may revisit in the future. @@ -3907,7 +3905,7 @@ def _update_inplace(self, result, verify_is_copy: bool_t = True) -> None: self._reset_cache() self._clear_item_cache() self._data = getattr(result, "_data", result) - self._maybe_update_cacher(verify_is_copy=verify_is_copy, **kwargs) + self._maybe_update_cacher(verify_is_copy=verify_is_copy) def add_prefix(self: FrameOrSeries, prefix: str) -> FrameOrSeries: """ From 27cbd51e67f5aca0965c5ddbc45d6287f16efed6 Mon Sep 17 00:00:00 2001 From: rjfs Date: Mon, 27 Jan 2020 23:09:26 +0100 Subject: [PATCH 09/15] Update v1.0.0.rst --- doc/source/whatsnew/v1.0.0.rst | 1180 +++----------------------------- 1 file changed, 80 insertions(+), 1100 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 893308d5eca6d..920919755dc23 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -1,35 +1,11 @@ -.. _whatsnew_100: +.. _whatsnew_110: -What's new in 1.0.0 (??) +What's new in 1.1.0 (??) ------------------------ -These are the changes in pandas 1.0.0. See :ref:`release` for a full changelog +These are the changes in pandas 1.1.0. See :ref:`release` for a full changelog including other versions of pandas. -.. note:: - - The pandas 1.0 release removed a lot of functionality that was deprecated - in previous releases (see :ref:`below ` - for an overview). It is recommended to first upgrade to pandas 0.25 and to - ensure your code is working without warnings, before upgrading to pandas - 1.0. - - -New Deprecation Policy -~~~~~~~~~~~~~~~~~~~~~~ - -Starting with Pandas 1.0.0, pandas will adopt a variant of `SemVer`_ to -version releases. Briefly, - -* Deprecations will be introduced in minor releases (e.g. 1.1.0, 1.2.0, 2.1.0, ...) -* Deprecations will be enforced in major releases (e.g. 1.0.0, 2.0.0, 3.0.0, ...) -* API-breaking changes will be made only in major releases (except for experimental features) - -See :ref:`policies.version` for more. - -.. _2019 Pandas User Survey: http://dev.pandas.io/pandas-blog/2019-pandas-user-survey.html -.. _SemVer: https://semver.org - {{ header }} .. --------------------------------------------------------------------------- @@ -37,929 +13,81 @@ See :ref:`policies.version` for more. Enhancements ~~~~~~~~~~~~ -.. _whatsnew_100.NA: - -Experimental ``NA`` scalar to denote missing values -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -A new ``pd.NA`` value (singleton) is introduced to represent scalar missing -values. Up to now, pandas used several values to represent missing data: ``np.nan`` is used for this for float data, ``np.nan`` or -``None`` for object-dtype data and ``pd.NaT`` for datetime-like data. The -goal of ``pd.NA`` is to provide a "missing" indicator that can be used -consistently across data types. ``pd.NA`` is currently used by the nullable integer and boolean -data types and the new string data type (:issue:`28095`). - -.. warning:: - - Experimental: the behaviour of ``pd.NA`` can still change without warning. - -For example, creating a Series using the nullable integer dtype: - -.. ipython:: python - - s = pd.Series([1, 2, None], dtype="Int64") - s - s[2] - -Compared to ``np.nan``, ``pd.NA`` behaves differently in certain operations. -In addition to arithmetic operations, ``pd.NA`` also propagates as "missing" -or "unknown" in comparison operations: - -.. ipython:: python - - np.nan > 1 - pd.NA > 1 - -For logical operations, ``pd.NA`` follows the rules of the -`three-valued logic `__ (or -*Kleene logic*). For example: - -.. ipython:: python - - pd.NA | True - -For more, see :ref:`NA section ` in the user guide on missing -data. - - -.. _whatsnew_100.string: - -Dedicated string data type -^^^^^^^^^^^^^^^^^^^^^^^^^^ - -We've added :class:`StringDtype`, an extension type dedicated to string data. -Previously, strings were typically stored in object-dtype NumPy arrays. (:issue:`29975`) - -.. warning:: - - ``StringDtype`` is currently considered experimental. The implementation - and parts of the API may change without warning. - -The ``'string'`` extension type solves several issues with object-dtype NumPy arrays: - -1. You can accidentally store a *mixture* of strings and non-strings in an - ``object`` dtype array. A ``StringArray`` can only store strings. -2. ``object`` dtype breaks dtype-specific operations like :meth:`DataFrame.select_dtypes`. - There isn't a clear way to select *just* text while excluding non-text, - but still object-dtype columns. -3. When reading code, the contents of an ``object`` dtype array is less clear - than ``string``. - - -.. ipython:: python - - pd.Series(['abc', None, 'def'], dtype=pd.StringDtype()) - -You can use the alias ``"string"`` as well. - -.. ipython:: python - - s = pd.Series(['abc', None, 'def'], dtype="string") - s - -The usual string accessor methods work. Where appropriate, the return type -of the Series or columns of a DataFrame will also have string dtype. - -.. ipython:: python - - s.str.upper() - s.str.split('b', expand=True).dtypes - -String accessor methods returning integers will return a value with :class:`Int64Dtype` - -.. ipython:: python - - s.str.count("a") - -We recommend explicitly using the ``string`` data type when working with strings. -See :ref:`text.types` for more. - -.. _whatsnew_100.boolean: - -Boolean data type with missing values support -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -We've added :class:`BooleanDtype` / :class:`~arrays.BooleanArray`, an extension -type dedicated to boolean data that can hold missing values. The default -``bool`` data type based on a bool-dtype NumPy array, the column can only hold -``True`` or ``False``, and not missing values. This new :class:`~arrays.BooleanArray` -can store missing values as well by keeping track of this in a separate mask. -(:issue:`29555`, :issue:`30095`, :issue:`31131`) - -.. ipython:: python - - pd.Series([True, False, None], dtype=pd.BooleanDtype()) - -You can use the alias ``"boolean"`` as well. - -.. ipython:: python - - s = pd.Series([True, False, None], dtype="boolean") - s - -.. _whatsnew_100.convert_dtypes: - -``convert_dtypes`` method to ease use of supported extension dtypes -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -In order to encourage use of the extension dtypes ``StringDtype``, -``BooleanDtype``, ``Int64Dtype``, ``Int32Dtype``, etc., that support ``pd.NA``, the -methods :meth:`DataFrame.convert_dtypes` and :meth:`Series.convert_dtypes` -have been introduced. (:issue:`29752`) (:issue:`30929`) +.. _whatsnew_110.period_index_partial_string_slicing: -Example: +Nonmonotonic PeriodIndex Partial String Slicing +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -.. ipython:: python +:class:`PeriodIndex` now supports partial string slicing for non-monotonic indexes, mirroring :class:`DatetimeIndex` behavior (:issue:`31096`) - df = pd.DataFrame({'x': ['abc', None, 'def'], - 'y': [1, 2, np.nan], - 'z': [True, False, True]}) - df - df.dtypes +For example: .. ipython:: python - converted = df.convert_dtypes() - converted - converted.dtypes - -This is especially useful after reading in data using readers such as :func:`read_csv` -and :func:`read_excel`. -See :ref:`here ` for a description. - -.. _whatsnew_100.numba_rolling_apply: - -Using Numba in ``rolling.apply`` and ``expanding.apply`` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -We've added an ``engine`` keyword to :meth:`~core.window.rolling.Rolling.apply` and :meth:`~core.window.expanding.Expanding.apply` -that allows the user to execute the routine using `Numba `__ instead of Cython. -Using the Numba engine can yield significant performance gains if the apply function can operate on numpy arrays and -the data set is larger (1 million rows or greater). For more details, see -:ref:`rolling apply documentation ` (:issue:`28987`, :issue:`30936`) - -.. _whatsnew_100.custom_window: - -Defining custom windows for rolling operations -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -We've added a :func:`pandas.api.indexers.BaseIndexer` class that allows users to define how -window bounds are created during ``rolling`` operations. Users can define their own ``get_window_bounds`` -method on a :func:`pandas.api.indexers.BaseIndexer` subclass that will generate the start and end -indices used for each window during the rolling aggregation. For more details and example usage, see -the :ref:`custom window rolling documentation ` - -.. _whatsnew_100.to_markdown: - -Converting to Markdown -^^^^^^^^^^^^^^^^^^^^^^ - -We've added :meth:`~DataFrame.to_markdown` for creating a markdown table (:issue:`11052`) + dti = pd.date_range("2014-01-01", periods=30, freq="30D") + pi = dti.to_period("D") + ser_monotonic = pd.Series(np.arange(30), index=pi) + shuffler = list(range(0, 30, 2)) + list(range(1, 31, 2)) + ser = ser_monotonic[shuffler] + ser .. ipython:: python - df = pd.DataFrame({"A": [1, 2, 3], "B": [1, 2, 3]}, index=['a', 'a', 'b']) - print(df.to_markdown()) + ser["2014"] + ser.loc["May 2015"] -.. _whatsnew_100.enhancements.other: +.. _whatsnew_110.enhancements.other: Other enhancements ^^^^^^^^^^^^^^^^^^ -- :meth:`DataFrame.to_string` added the ``max_colwidth`` parameter to control when wide columns are truncated (:issue:`9784`) -- Added the ``na_value`` argument to :meth:`Series.to_numpy`, :meth:`Index.to_numpy` and :meth:`DataFrame.to_numpy` to control the value used for missing data (:issue:`30322`) -- :meth:`MultiIndex.from_product` infers level names from inputs if not explicitly provided (:issue:`27292`) -- :meth:`DataFrame.to_latex` now accepts ``caption`` and ``label`` arguments (:issue:`25436`) -- The :ref:`integer dtype ` with support for missing values and the - new :ref:`string dtype ` can now be converted to ``pyarrow`` (>= - 0.15.0), which means that it is supported in writing to the Parquet file - format when using the ``pyarrow`` engine. It is currently not yet supported - when converting back to pandas, so it will become an integer or float - (depending on the presence of missing data) or object dtype column. (:issue:`28368`) -- :meth:`DataFrame.to_json` now accepts an ``indent`` integer argument to enable pretty printing of JSON output (:issue:`12004`) -- :meth:`read_stata` can read Stata 119 dta files. (:issue:`28250`) -- Implemented :meth:`pandas.core.window.Window.var` and :meth:`pandas.core.window.Window.std` functions (:issue:`26597`) -- Added ``encoding`` argument to :meth:`DataFrame.to_string` for non-ascii text (:issue:`28766`) -- Added ``encoding`` argument to :func:`DataFrame.to_html` for non-ascii text (:issue:`28663`) -- :meth:`Styler.background_gradient` now accepts ``vmin`` and ``vmax`` arguments (:issue:`12145`) -- :meth:`Styler.format` added the ``na_rep`` parameter to help format the missing values (:issue:`21527`, :issue:`28358`) -- Roundtripping DataFrames with nullable integer, string and period data types to parquet - (:meth:`~DataFrame.to_parquet` / :func:`read_parquet`) using the `'pyarrow'` engine - now preserve those data types with pyarrow >= 1.0.0 (:issue:`20612`). -- :func:`read_excel` now can read binary Excel (``.xlsb``) files by passing ``engine='pyxlsb'``. For more details and example usage, see the :ref:`Binary Excel files documentation `. Closes :issue:`8540`. -- The ``partition_cols`` argument in :meth:`DataFrame.to_parquet` now accepts a string (:issue:`27117`) -- :func:`pandas.read_json` now parses ``NaN``, ``Infinity`` and ``-Infinity`` (:issue:`12213`) -- :func:`to_parquet` now appropriately handles the ``schema`` argument for user defined schemas in the pyarrow engine. (:issue:`30270`) -- DataFrame constructor preserve `ExtensionArray` dtype with `ExtensionArray` (:issue:`11363`) -- :meth:`DataFrame.sort_values` and :meth:`Series.sort_values` have gained ``ignore_index`` keyword to be able to reset index after sorting (:issue:`30114`) -- :meth:`DataFrame.sort_index` and :meth:`Series.sort_index` have gained ``ignore_index`` keyword to reset index (:issue:`30114`) -- :meth:`DataFrame.drop_duplicates` has gained ``ignore_index`` keyword to reset index (:issue:`30114`) -- Added new writer for exporting Stata dta files in versions 118 and 119, ``StataWriterUTF8``. These files formats support exporting strings containing Unicode characters. Format 119 supports data sets with more than 32,767 variables (:issue:`23573`, :issue:`30959`) -- :meth:`Series.map` now accepts ``collections.abc.Mapping`` subclasses as a mapper (:issue:`29733`) -- Added an experimental :attr:`~DataFrame.attrs` for storing global metadata about a dataset (:issue:`29062`) -- :meth:`Timestamp.fromisocalendar` is now compatible with python 3.8 and above (:issue:`28115`) -- :meth:`DataFrame.to_pickle` and :func:`read_pickle` now accept URL (:issue:`30163`) - - -Build Changes -^^^^^^^^^^^^^ - -Pandas has added a `pyproject.toml `_ file and will no longer include -cythonized files in the source distribution uploaded to PyPI (:issue:`28341`, :issue:`20775`). If you're installing -a built distribution (wheel) or via conda, this shouldn't have any effect on you. If you're building pandas from -source, you should no longer need to install Cython into your build environment before calling ``pip install pandas``. +- :class:`Styler` may now render CSS more efficiently where multiple cells have the same styling (:issue:`30876`) +- +- .. --------------------------------------------------------------------------- -.. _whatsnew_100.api_breaking: - -Backwards incompatible API changes -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. _whatsnew_100.api_breaking.MultiIndex._names: - -Avoid using names from ``MultiIndex.levels`` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -As part of a larger refactor to :class:`MultiIndex` the level names are now -stored separately from the levels (:issue:`27242`). We recommend using -:attr:`MultiIndex.names` to access the names, and :meth:`Index.set_names` -to update the names. - -For backwards compatibility, you can still *access* the names via the levels. - -.. ipython:: python - - mi = pd.MultiIndex.from_product([[1, 2], ['a', 'b']], names=['x', 'y']) - mi.levels[0].name - -However, it is no longer possible to *update* the names of the ``MultiIndex`` -via the level. - -.. ipython:: python - :okexcept: - - mi.levels[0].name = "new name" - mi.names - -To update, use ``MultiIndex.set_names``, which returns a new ``MultiIndex``. - -.. ipython:: python - - mi2 = mi.set_names("new name", level=0) - mi2.names - -New repr for :class:`~pandas.arrays.IntervalArray` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -- :class:`pandas.arrays.IntervalArray` adopts a new ``__repr__`` in accordance with other array classes (:issue:`25022`) - -*pandas 0.25.x* - -.. code-block:: ipython - - In [1]: pd.arrays.IntervalArray.from_tuples([(0, 1), (2, 3)]) - Out[2]: - IntervalArray([(0, 1], (2, 3]], - closed='right', - dtype='interval[int64]') - -*pandas 1.0.0* - -.. ipython:: python - - pd.arrays.IntervalArray.from_tuples([(0, 1), (2, 3)]) - -``DataFrame.rename`` now only accepts one positional argument -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -- :meth:`DataFrame.rename` would previously accept positional arguments that would lead - to ambiguous or undefined behavior. From pandas 1.0, only the very first argument, which - maps labels to their new names along the default axis, is allowed to be passed by position - (:issue:`29136`). - -*pandas 0.25.x* - -.. code-block:: ipython - - In [1]: df = pd.DataFrame([[1]]) - In [2]: df.rename({0: 1}, {0: 2}) - FutureWarning: ...Use named arguments to resolve ambiguity... - Out[2]: - 2 - 1 1 - -*pandas 1.0.0* - -.. ipython:: python - :okexcept: - - df.rename({0: 1}, {0: 2}) - -Note that errors will now be raised when conflicting or potentially ambiguous arguments are provided. - -*pandas 0.25.x* - -.. code-block:: ipython - - In [1]: df.rename({0: 1}, index={0: 2}) - Out[1]: - 0 - 1 1 - - In [2]: df.rename(mapper={0: 1}, index={0: 2}) - Out[2]: - 0 - 2 1 - -*pandas 1.0.0* - -.. ipython:: python - :okexcept: - - df.rename({0: 1}, index={0: 2}) - df.rename(mapper={0: 1}, index={0: 2}) - -You can still change the axis along which the first positional argument is applied by -supplying the ``axis`` keyword argument. - -.. ipython:: python - - df.rename({0: 1}) - df.rename({0: 1}, axis=1) - -If you would like to update both the index and column labels, be sure to use the respective -keywords. - -.. ipython:: python - - df.rename(index={0: 1}, columns={0: 2}) - -Extended verbose info output for :class:`~pandas.DataFrame` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -- :meth:`DataFrame.info` now shows line numbers for the columns summary (:issue:`17304`) - -*pandas 0.25.x* - -.. code-block:: python - - >>> df = pd.DataFrame({"int_col": [1, 2, 3], - ... "text_col": ["a", "b", "c"], - ... "float_col": [0.0, 0.1, 0.2]}) - >>> df.info(verbose=True) - - RangeIndex: 3 entries, 0 to 2 - Data columns (total 3 columns): - int_col 3 non-null int64 - text_col 3 non-null object - float_col 3 non-null float64 - dtypes: float64(1), int64(1), object(1) - memory usage: 152.0+ bytes - -*pandas 1.0.0* - -.. ipython:: python - - df = pd.DataFrame({"int_col": [1, 2, 3], - "text_col": ["a", "b", "c"], - "float_col": [0.0, 0.1, 0.2]}) - df.info(verbose=True) - -:meth:`pandas.array` inference changes -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -:meth:`pandas.array` now infers pandas' new extension types in several cases (:issue:`29791`): - -1. String data (including missing values) now returns a :class:`arrays.StringArray`. -2. Integer data (including missing values) now returns a :class:`arrays.IntegerArray`. -3. Boolean data (including missing values) now returns the new :class:`arrays.BooleanArray` - -*pandas 0.25.x* - -.. code-block:: python - - >>> pd.array(["a", None]) - - ['a', None] - Length: 2, dtype: object - - >>> pd.array([1, None]) - - [1, None] - Length: 2, dtype: object - - -*pandas 1.0.0* - -.. ipython:: python - - pd.array(["a", None]) - pd.array([1, None]) - -As a reminder, you can specify the ``dtype`` to disable all inference. - -:class:`arrays.IntegerArray` now uses :attr:`pandas.NA` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -:class:`arrays.IntegerArray` now uses :attr:`pandas.NA` rather than -:attr:`numpy.nan` as its missing value marker (:issue:`29964`). - -*pandas 0.25.x* - -.. code-block:: python - - >>> a = pd.array([1, 2, None], dtype="Int64") - >>> a - - [1, 2, NaN] - Length: 3, dtype: Int64 - - >>> a[2] - nan - -*pandas 1.0.0* - -.. ipython:: python - - a = pd.array([1, 2, None], dtype="Int64") - a - a[2] - -This has a few API-breaking consequences. - -**Converting to a NumPy ndarray** - -When converting to a NumPy array missing values will be ``pd.NA``, which cannot -be converted to a float. So calling ``np.asarray(integer_array, dtype="float")`` -will now raise. - -*pandas 0.25.x* - -.. code-block:: python - - >>> np.asarray(a, dtype="float") - array([ 1., 2., nan]) - -*pandas 1.0.0* - -.. ipython:: python - :okexcept: - - np.asarray(a, dtype="float") - -Use :meth:`arrays.IntegerArray.to_numpy` with an explicit ``na_value`` instead. - -.. ipython:: python - - a.to_numpy(dtype="float", na_value=np.nan) - -**Reductions can return ``pd.NA``** - -When performing a reduction such as a sum with ``skipna=False``, the result -will now be ``pd.NA`` instead of ``np.nan`` in presence of missing values -(:issue:`30958`). - -*pandas 0.25.x* - -.. code-block:: python - - >>> pd.Series(a).sum(skipna=False) - nan - -*pandas 1.0.0* - -.. ipython:: python - - pd.Series(a).sum(skipna=False) - -**value_counts returns a nullable integer dtype** - -:meth:`Series.value_counts` with a nullable integer dtype now returns a nullable -integer dtype for the values. - -*pandas 0.25.x* - -.. code-block:: python - - >>> pd.Series([2, 1, 1, None], dtype="Int64").value_counts().dtype - dtype('int64') - -*pandas 1.0.0* - -.. ipython:: python - - pd.Series([2, 1, 1, None], dtype="Int64").value_counts().dtype - -See :ref:`missing_data.NA` for more on the differences between :attr:`pandas.NA` -and :attr:`numpy.nan`. - -:class:`arrays.IntegerArray` comparisons return :class:`arrays.BooleanArray` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Comparison operations on a :class:`arrays.IntegerArray` now returns a -:class:`arrays.BooleanArray` rather than a NumPy array (:issue:`29964`). - -*pandas 0.25.x* - -.. code-block:: python - - >>> a = pd.array([1, 2, None], dtype="Int64") - >>> a - - [1, 2, NaN] - Length: 3, dtype: Int64 - - >>> a > 1 - array([False, True, False]) - -*pandas 1.0.0* - -.. ipython:: python - - a = pd.array([1, 2, None], dtype="Int64") - a > 1 - -Note that missing values now propagate, rather than always comparing unequal -like :attr:`numpy.nan`. See :ref:`missing_data.NA` for more. - -By default :meth:`Categorical.min` now returns the minimum instead of np.nan -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -When :class:`Categorical` contains ``np.nan``, -:meth:`Categorical.min` no longer return ``np.nan`` by default (skipna=True) (:issue:`25303`) - -*pandas 0.25.x* - -.. code-block:: ipython - - In [1]: pd.Categorical([1, 2, np.nan], ordered=True).min() - Out[1]: nan - - -*pandas 1.0.0* - -.. ipython:: python - - pd.Categorical([1, 2, np.nan], ordered=True).min() - - -Default dtype of empty :class:`pandas.Series` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Initialising an empty :class:`pandas.Series` without specifying a dtype will raise a `DeprecationWarning` now -(:issue:`17261`). The default dtype will change from ``float64`` to ``object`` in future releases so that it is -consistent with the behaviour of :class:`DataFrame` and :class:`Index`. - -*pandas 1.0.0* - -.. code-block:: ipython - - In [1]: pd.Series() - Out[2]: - DeprecationWarning: The default dtype for empty Series will be 'object' instead of 'float64' in a future version. Specify a dtype explicitly to silence this warning. - Series([], dtype: float64) - -.. _whatsnew_100.api_breaking.python: - -Increased minimum version for Python -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Pandas 1.0.0 supports Python 3.6.1 and higher (:issue:`29212`). - -.. _whatsnew_100.api_breaking.deps: - -Increased minimum versions for dependencies -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Some minimum supported versions of dependencies were updated (:issue:`29766`, :issue:`29723`). -If installed, we now require: - -+-----------------+-----------------+----------+---------+ -| Package | Minimum Version | Required | Changed | -+=================+=================+==========+=========+ -| numpy | 1.13.3 | X | | -+-----------------+-----------------+----------+---------+ -| pytz | 2015.4 | X | | -+-----------------+-----------------+----------+---------+ -| python-dateutil | 2.6.1 | X | | -+-----------------+-----------------+----------+---------+ -| bottleneck | 1.2.1 | | | -+-----------------+-----------------+----------+---------+ -| numexpr | 2.6.2 | | | -+-----------------+-----------------+----------+---------+ -| pytest (dev) | 4.0.2 | | | -+-----------------+-----------------+----------+---------+ - -For `optional libraries `_ the general recommendation is to use the latest version. -The following table lists the lowest version per library that is currently being tested throughout the development of pandas. -Optional libraries below the lowest tested version may still work, but are not considered supported. - -+-----------------+-----------------+---------+ -| Package | Minimum Version | Changed | -+=================+=================+=========+ -| beautifulsoup4 | 4.6.0 | | -+-----------------+-----------------+---------+ -| fastparquet | 0.3.2 | X | -+-----------------+-----------------+---------+ -| gcsfs | 0.2.2 | | -+-----------------+-----------------+---------+ -| lxml | 3.8.0 | | -+-----------------+-----------------+---------+ -| matplotlib | 2.2.2 | | -+-----------------+-----------------+---------+ -| numba | 0.46.0 | X | -+-----------------+-----------------+---------+ -| openpyxl | 2.5.7 | X | -+-----------------+-----------------+---------+ -| pyarrow | 0.13.0 | X | -+-----------------+-----------------+---------+ -| pymysql | 0.7.1 | | -+-----------------+-----------------+---------+ -| pytables | 3.4.2 | | -+-----------------+-----------------+---------+ -| s3fs | 0.3.0 | X | -+-----------------+-----------------+---------+ -| scipy | 0.19.0 | | -+-----------------+-----------------+---------+ -| sqlalchemy | 1.1.4 | | -+-----------------+-----------------+---------+ -| xarray | 0.8.2 | | -+-----------------+-----------------+---------+ -| xlrd | 1.1.0 | | -+-----------------+-----------------+---------+ -| xlsxwriter | 0.9.8 | | -+-----------------+-----------------+---------+ -| xlwt | 1.2.0 | | -+-----------------+-----------------+---------+ - -See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for more. - -.. _whatsnew_100.api.other: +.. _whatsnew_110.api.other: Other API changes ^^^^^^^^^^^^^^^^^ -- Bumped the minimum supported version of ``s3fs`` from 0.0.8 to 0.3.0 (:issue:`28616`) -- :class:`core.groupby.GroupBy.transform` now raises on invalid operation names (:issue:`27489`) -- :meth:`pandas.api.types.infer_dtype` will now return "integer-na" for integer and ``np.nan`` mix (:issue:`27283`) -- :meth:`MultiIndex.from_arrays` will no longer infer names from arrays if ``names=None`` is explicitly provided (:issue:`27292`) -- In order to improve tab-completion, Pandas does not include most deprecated attributes when introspecting a pandas object using ``dir`` (e.g. ``dir(df)``). - To see which attributes are excluded, see an object's ``_deprecations`` attribute, for example ``pd.DataFrame._deprecations`` (:issue:`28805`). -- The returned dtype of ::func:`pd.unique` now matches the input dtype. (:issue:`27874`) -- Changed the default configuration value for ``options.matplotlib.register_converters`` from ``True`` to ``"auto"`` (:issue:`18720`). - Now, pandas custom formatters will only be applied to plots created by pandas, through :meth:`~DataFrame.plot`. - Previously, pandas' formatters would be applied to all plots created *after* a :meth:`~DataFrame.plot`. - See :ref:`units registration ` for more. -- :meth:`Series.dropna` has dropped its ``**kwargs`` argument in favor of a single ``how`` parameter. - Supplying anything else than ``how`` to ``**kwargs`` raised a ``TypeError`` previously (:issue:`29388`) -- When testing pandas, the new minimum required version of pytest is 5.0.1 (:issue:`29664`) -- :meth:`Series.str.__iter__` was deprecated and will be removed in future releases (:issue:`28277`). -- Added ```` to the list of default NA values for :meth:`read_csv` (:issue:`30821`) - -.. _whatsnew_100.api.documentation: - -Documentation Improvements -^^^^^^^^^^^^^^^^^^^^^^^^^^ - -- Added new section on :ref:`scale` (:issue:`28315`). -- Added sub-section on :ref:`io.query_multi` for HDF5 datasets (:issue:`28791`). +- :meth:`Series.describe` will now show distribution percentiles for ``datetime`` dtypes, statistics ``first`` and ``last`` + will now be ``min`` and ``max`` to match with numeric dtypes in :meth:`DataFrame.describe` (:issue:`30164`) +- + +Backwards incompatible API changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +- :meth:`DataFrame.swaplevels` now raises a ``TypeError`` if the axis is not a :class:`MultiIndex`. + Previously a ``AttributeError`` was raised (:issue:`31126`) + .. --------------------------------------------------------------------------- -.. _whatsnew_100.deprecations: +.. _whatsnew_110.deprecations: Deprecations ~~~~~~~~~~~~ -- :meth:`Series.item` and :meth:`Index.item` have been _undeprecated_ (:issue:`29250`) -- ``Index.set_value`` has been deprecated. For a given index ``idx``, array ``arr``, - value in ``idx`` of ``idx_val`` and a new value of ``val``, ``idx.set_value(arr, idx_val, val)`` - is equivalent to ``arr[idx.get_loc(idx_val)] = val``, which should be used instead (:issue:`28621`). -- :func:`is_extension_type` is deprecated, :func:`is_extension_array_dtype` should be used instead (:issue:`29457`) -- :func:`eval` keyword argument "truediv" is deprecated and will be removed in a future version (:issue:`29812`) -- :meth:`DateOffset.isAnchored` and :meth:`DatetOffset.onOffset` are deprecated and will be removed in a future version, use :meth:`DateOffset.is_anchored` and :meth:`DateOffset.is_on_offset` instead (:issue:`30340`) -- ``pandas.tseries.frequencies.get_offset`` is deprecated and will be removed in a future version, use ``pandas.tseries.frequencies.to_offset`` instead (:issue:`4205`) -- :meth:`Categorical.take_nd` and :meth:`CategoricalIndex.take_nd` are deprecated, use :meth:`Categorical.take` and :meth:`CategoricalIndex.take` instead (:issue:`27745`) -- The parameter ``numeric_only`` of :meth:`Categorical.min` and :meth:`Categorical.max` is deprecated and replaced with ``skipna`` (:issue:`25303`) -- The parameter ``label`` in :func:`lreshape` has been deprecated and will be removed in a future version (:issue:`29742`) -- ``pandas.core.index`` has been deprecated and will be removed in a future version, the public classes are available in the top-level namespace (:issue:`19711`) -- :func:`pandas.json_normalize` is now exposed in the top-level namespace. - Usage of ``json_normalize`` as ``pandas.io.json.json_normalize`` is now deprecated and - it is recommended to use ``json_normalize`` as :func:`pandas.json_normalize` instead (:issue:`27586`). -- The ``numpy`` argument of :meth:`pandas.read_json` is deprecated (:issue:`28512`). -- :meth:`DataFrame.to_stata`, :meth:`DataFrame.to_feather`, and :meth:`DataFrame.to_parquet` argument "fname" is deprecated, use "path" instead (:issue:`23574`) -- The deprecated internal attributes ``_start``, ``_stop`` and ``_step`` of :class:`RangeIndex` now raise a ``FutureWarning`` instead of a ``DeprecationWarning`` (:issue:`26581`) -- The ``pandas.util.testing`` module has been deprecated. Use the public API in ``pandas.testing`` documented at :ref:`api.general.testing` (:issue:`16232`). -- ``pandas.SparseArray`` has been deprecated. Use ``pandas.arrays.SparseArray`` (:class:`arrays.SparseArray`) instead. (:issue:`30642`) -- The parameter ``is_copy`` of :meth:`Series.take` and :meth:`DataFrame.take` has been deprecated and will be removed in a future version. (:issue:`27357`) -- Support for multi-dimensional indexing (e.g. ``index[:, None]``) on a :class:`Index` is deprecated and will be removed in a future version, convert to a numpy array before indexing instead (:issue:`30588`) -- The ``pandas.np`` submodule is now deprecated. Import numpy directly instead (:issue:`30296`) -- The ``pandas.datetime`` class is now deprecated. Import from ``datetime`` instead (:issue:`30610`) -- :class:`~DataFrame.diff` will raise a ``TypeError`` rather than implicitly losing the dtype of extension types in the future. Convert to the correct dtype before calling ``diff`` instead (:issue:`31025`) - -**Selecting Columns from a Grouped DataFrame** - -When selecting columns from a :class:`DataFrameGroupBy` object, passing individual keys (or a tuple of keys) inside single brackets is deprecated, -a list of items should be used instead. (:issue:`23566`) For example: - -.. code-block:: ipython - - df = pd.DataFrame({ - "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], - "B": np.random.randn(8), - "C": np.random.randn(8), - }) - g = df.groupby('A') - - # single key, returns SeriesGroupBy - g['B'] - - # tuple of single key, returns SeriesGroupBy - g[('B',)] - - # tuple of multiple keys, returns DataFrameGroupBy, raises FutureWarning - g[('B', 'C')] - - # multiple keys passed directly, returns DataFrameGroupBy, raises FutureWarning - # (implicitly converts the passed strings into a single tuple) - g['B', 'C'] - - # proper way, returns DataFrameGroupBy - g[['B', 'C']] - -.. --------------------------------------------------------------------------- - -.. _whatsnew_100.prior_deprecations: - -Removal of prior version deprecations/changes -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -**Removed SparseSeries and SparseDataFrame** - -``SparseSeries``, ``SparseDataFrame`` and the ``DataFrame.to_sparse`` method -have been removed (:issue:`28425`). We recommend using a ``Series`` or -``DataFrame`` with sparse values instead. See :ref:`sparse.migration` for help -with migrating existing code. - -.. _whatsnew_100.matplotlib_units: - -**Matplotlib unit registration** - -Previously, pandas would register converters with matplotlib as a side effect of importing pandas (:issue:`18720`). -This changed the output of plots made via matplotlib plots after pandas was imported, even if you were using -matplotlib directly rather than :meth:`~DataFrame.plot`. - -To use pandas formatters with a matplotlib plot, specify - -.. code-block:: python - - >>> import pandas as pd - >>> pd.options.plotting.matplotlib.register_converters = True - -Note that plots created by :meth:`DataFrame.plot` and :meth:`Series.plot` *do* register the converters -automatically. The only behavior change is when plotting a date-like object via ``matplotlib.pyplot.plot`` -or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more. - -**Other removals** - -- Removed the previously deprecated keyword "index" from :func:`read_stata`, :class:`StataReader`, and :meth:`StataReader.read`, use "index_col" instead (:issue:`17328`) -- Removed ``StataReader.data`` method, use :meth:`StataReader.read` instead (:issue:`9493`) -- Removed ``pandas.plotting._matplotlib.tsplot``, use :meth:`Series.plot` instead (:issue:`19980`) -- ``pandas.tseries.converter.register`` has been moved to :func:`pandas.plotting.register_matplotlib_converters` (:issue:`18307`) -- :meth:`Series.plot` no longer accepts positional arguments, pass keyword arguments instead (:issue:`30003`) -- :meth:`DataFrame.hist` and :meth:`Series.hist` no longer allows ``figsize="default"``, specify figure size by passinig a tuple instead (:issue:`30003`) -- Floordiv of integer-dtyped array by :class:`Timedelta` now raises ``TypeError`` (:issue:`21036`) -- :class:`TimedeltaIndex` and :class:`DatetimeIndex` no longer accept non-nanosecond dtype strings like "timedelta64" or "datetime64", use "timedelta64[ns]" and "datetime64[ns]" instead (:issue:`24806`) -- :func:`pandas.api.types.infer_dtype` argument ``skipna`` defaults to ``True`` instead of ``False`` (:issue:`24050`) -- Removed :attr:`Series.ix` and :attr:`DataFrame.ix` (:issue:`26438`) -- Removed :meth:`Index.summary` (:issue:`18217`) -- Removed the previously deprecated keyword "fastpath" from the :class:`Index` constructor (:issue:`23110`) -- Removed :meth:`Series.get_value`, :meth:`Series.set_value`, :meth:`DataFrame.get_value`, :meth:`DataFrame.set_value` (:issue:`17739`) -- Removed :meth:`Series.compound` and :meth:`DataFrame.compound` (:issue:`26405`) -- Changed the the default value of `inplace` in :meth:`DataFrame.set_index` and :meth:`Series.set_axis`. It now defaults to ``False`` (:issue:`27600`) -- Removed :attr:`Series.cat.categorical`, :attr:`Series.cat.index`, :attr:`Series.cat.name` (:issue:`24751`) -- Removed the previously deprecated keyword "box" from :func:`to_datetime` and :func:`to_timedelta`; in addition these now always returns :class:`DatetimeIndex`, :class:`TimedeltaIndex`, :class:`Index`, :class:`Series`, or :class:`DataFrame` (:issue:`24486`) -- :func:`to_timedelta`, :class:`Timedelta`, and :class:`TimedeltaIndex` no longer allow "M", "y", or "Y" for the "unit" argument (:issue:`23264`) -- Removed the previously deprecated keyword "time_rule" from (non-public) ``offsets.generate_range``, which has been moved to :func:`core.arrays._ranges.generate_range` (:issue:`24157`) -- :meth:`DataFrame.loc` or :meth:`Series.loc` with listlike indexers and missing labels will no longer reindex (:issue:`17295`) -- :meth:`DataFrame.to_excel` and :meth:`Series.to_excel` with non-existent columns will no longer reindex (:issue:`17295`) -- Removed the previously deprecated keyword "join_axes" from :func:`concat`; use ``reindex_like`` on the result instead (:issue:`22318`) -- Removed the previously deprecated keyword "by" from :meth:`DataFrame.sort_index`, use :meth:`DataFrame.sort_values` instead (:issue:`10726`) -- Removed support for nested renaming in :meth:`DataFrame.aggregate`, :meth:`Series.aggregate`, :meth:`core.groupby.DataFrameGroupBy.aggregate`, :meth:`core.groupby.SeriesGroupBy.aggregate`, :meth:`core.window.rolling.Rolling.aggregate` (:issue:`18529`) -- Passing ``datetime64`` data to :class:`TimedeltaIndex` or ``timedelta64`` data to ``DatetimeIndex`` now raises ``TypeError`` (:issue:`23539`, :issue:`23937`) -- Passing ``int64`` values to :class:`DatetimeIndex` and a timezone now interprets the values as nanosecond timestamps in UTC, not wall times in the given timezone (:issue:`24559`) -- A tuple passed to :meth:`DataFrame.groupby` is now exclusively treated as a single key (:issue:`18314`) -- Removed ``Index.contains``, use ``key in index`` instead (:issue:`30103`) -- Addition and subtraction of ``int`` or integer-arrays is no longer allowed in :class:`Timestamp`, :class:`DatetimeIndex`, :class:`TimedeltaIndex`, use ``obj + n * obj.freq`` instead of ``obj + n`` (:issue:`22535`) -- Removed ``Series.ptp`` (:issue:`21614`) -- Removed ``Series.from_array`` (:issue:`18258`) -- Removed ``DataFrame.from_items`` (:issue:`18458`) -- Removed ``DataFrame.as_matrix``, ``Series.as_matrix`` (:issue:`18458`) -- Removed ``Series.asobject`` (:issue:`18477`) -- Removed ``DataFrame.as_blocks``, ``Series.as_blocks``, ``DataFrame.blocks``, ``Series.blocks`` (:issue:`17656`) -- :meth:`pandas.Series.str.cat` now defaults to aligning ``others``, using ``join='left'`` (:issue:`27611`) -- :meth:`pandas.Series.str.cat` does not accept list-likes *within* list-likes anymore (:issue:`27611`) -- :meth:`Series.where` with ``Categorical`` dtype (or :meth:`DataFrame.where` with ``Categorical`` column) no longer allows setting new categories (:issue:`24114`) -- Removed the previously deprecated keywords "start", "end", and "periods" from the :class:`DatetimeIndex`, :class:`TimedeltaIndex`, and :class:`PeriodIndex` constructors; use :func:`date_range`, :func:`timedelta_range`, and :func:`period_range` instead (:issue:`23919`) -- Removed the previously deprecated keyword "verify_integrity" from the :class:`DatetimeIndex` and :class:`TimedeltaIndex` constructors (:issue:`23919`) -- Removed the previously deprecated keyword "fastpath" from ``pandas.core.internals.blocks.make_block`` (:issue:`19265`) -- Removed the previously deprecated keyword "dtype" from :meth:`Block.make_block_same_class` (:issue:`19434`) -- Removed ``ExtensionArray._formatting_values``. Use :attr:`ExtensionArray._formatter` instead. (:issue:`23601`) -- Removed ``MultiIndex.to_hierarchical`` (:issue:`21613`) -- Removed ``MultiIndex.labels``, use :attr:`MultiIndex.codes` instead (:issue:`23752`) -- Removed the previously deprecated keyword "labels" from the :class:`MultiIndex` constructor, use "codes" instead (:issue:`23752`) -- Removed ``MultiIndex.set_labels``, use :meth:`MultiIndex.set_codes` instead (:issue:`23752`) -- Removed the previously deprecated keyword "labels" from :meth:`MultiIndex.set_codes`, :meth:`MultiIndex.copy`, :meth:`MultiIndex.drop`, use "codes" instead (:issue:`23752`) -- Removed support for legacy HDF5 formats (:issue:`29787`) -- Passing a dtype alias (e.g. 'datetime64[ns, UTC]') to :class:`DatetimeTZDtype` is no longer allowed, use :meth:`DatetimeTZDtype.construct_from_string` instead (:issue:`23990`) -- Removed the previously deprecated keyword "skip_footer" from :func:`read_excel`; use "skipfooter" instead (:issue:`18836`) -- :func:`read_excel` no longer allows an integer value for the parameter ``usecols``, instead pass a list of integers from 0 to ``usecols`` inclusive (:issue:`23635`) -- Removed the previously deprecated keyword "convert_datetime64" from :meth:`DataFrame.to_records` (:issue:`18902`) -- Removed :meth:`IntervalIndex.from_intervals` in favor of the :class:`IntervalIndex` constructor (:issue:`19263`) -- Changed the default value for the "keep_tz" argument in :meth:`DatetimeIndex.to_series` to ``True`` (:issue:`23739`) -- Removed :func:`api.types.is_period` and :func:`api.types.is_datetimetz` (:issue:`23917`) -- Ability to read pickles containing :class:`Categorical` instances created with pre-0.16 version of pandas has been removed (:issue:`27538`) -- Removed ``pandas.tseries.plotting.tsplot`` (:issue:`18627`) -- Removed the previously deprecated keywords "reduce" and "broadcast" from :meth:`DataFrame.apply` (:issue:`18577`) -- Removed the previously deprecated ``assert_raises_regex`` function in ``pandas._testing`` (:issue:`29174`) -- Removed the previously deprecated ``FrozenNDArray`` class in ``pandas.core.indexes.frozen`` (:issue:`29335`) -- Removed the previously deprecated keyword "nthreads" from :func:`read_feather`, use "use_threads" instead (:issue:`23053`) -- Removed :meth:`Index.is_lexsorted_for_tuple` (:issue:`29305`) -- Removed support for nexted renaming in :meth:`DataFrame.aggregate`, :meth:`Series.aggregate`, :meth:`DataFrameGroupBy.aggregate`, :meth:`SeriesGroupBy.aggregate`, :meth:`Rolling.aggregate` (:issue:`29608`) -- Removed :meth:`Series.valid`; use :meth:`Series.dropna` instead (:issue:`18800`) -- Removed :attr:`DataFrame.is_copy`, :attr:`Series.is_copy` (:issue:`18812`) -- Removed :meth:`DataFrame.get_ftype_counts`, :meth:`Series.get_ftype_counts` (:issue:`18243`) -- Removed :meth:`DataFrame.ftypes`, :meth:`Series.ftypes`, :meth:`Series.ftype` (:issue:`26744`) -- Removed :meth:`Index.get_duplicates`, use ``idx[idx.duplicated()].unique()`` instead (:issue:`20239`) -- Removed :meth:`Series.clip_upper`, :meth:`Series.clip_lower`, :meth:`DataFrame.clip_upper`, :meth:`DataFrame.clip_lower` (:issue:`24203`) -- Removed the ability to alter :attr:`DatetimeIndex.freq`, :attr:`TimedeltaIndex.freq`, or :attr:`PeriodIndex.freq` (:issue:`20772`) -- Removed ``DatetimeIndex.offset`` (:issue:`20730`) -- Removed ``DatetimeIndex.asobject``, ``TimedeltaIndex.asobject``, ``PeriodIndex.asobject``, use ``astype(object)`` instead (:issue:`29801`) -- Removed the previously deprecated keyword "order" from :func:`factorize` (:issue:`19751`) -- Removed the previously deprecated keyword "encoding" from :func:`read_stata` and :meth:`DataFrame.to_stata` (:issue:`21400`) -- In :func:`concat` the default value for ``sort`` has been changed from ``None`` to ``False`` (:issue:`20613`) -- Removed the previously deprecated keyword "raise_conflict" from :meth:`DataFrame.update`, use "errors" instead (:issue:`23585`) -- Removed the previously deprecated keyword "n" from :meth:`DatetimeIndex.shift`, :meth:`TimedeltaIndex.shift`, :meth:`PeriodIndex.shift`, use "periods" instead (:issue:`22458`) -- Removed the previously deprecated keywords "how", "fill_method", and "limit" from :meth:`DataFrame.resample` (:issue:`30139`) -- Passing an integer to :meth:`Series.fillna` or :meth:`DataFrame.fillna` with ``timedelta64[ns]`` dtype now raises ``TypeError`` (:issue:`24694`) -- Passing multiple axes to :meth:`DataFrame.dropna` is no longer supported (:issue:`20995`) -- Removed ``Series.nonzero``, use ``to_numpy().nonzero()`` instead (:issue:`24048`) -- Passing floating dtype ``codes`` to :meth:`Categorical.from_codes` is no longer supported, pass ``codes.astype(np.int64)`` instead (:issue:`21775`) -- Removed the previously deprecated keyword "pat" from :meth:`Series.str.partition` and :meth:`Series.str.rpartition`, use "sep" instead (:issue:`23767`) -- Removed ``Series.put`` (:issue:`27106`) -- Removed ``Series.real``, ``Series.imag`` (:issue:`27106`) -- Removed ``Series.to_dense``, ``DataFrame.to_dense`` (:issue:`26684`) -- Removed ``Index.dtype_str``, use ``str(index.dtype)`` instead (:issue:`27106`) -- :meth:`Categorical.ravel` returns a :class:`Categorical` instead of a ``ndarray`` (:issue:`27199`) -- The 'outer' method on Numpy ufuncs, e.g. ``np.subtract.outer`` operating on :class:`Series` objects is no longer supported, and will raise ``NotImplementedError`` (:issue:`27198`) -- Removed :meth:`Series.get_dtype_counts` and :meth:`DataFrame.get_dtype_counts` (:issue:`27145`) -- Changed the default ``fill_value`` in :meth:`Categorical.take` from ``True`` to ``False`` (:issue:`20841`) -- Changed the default value for the `raw` argument in :func:`Series.rolling().apply() `, :func:`DataFrame.rolling().apply() `, -- :func:`Series.expanding().apply() `, and :func:`DataFrame.expanding().apply() ` to ``False`` (:issue:`20584`) -- Removed deprecated behavior of :meth:`Series.argmin` and :meth:`Series.argmax`, use :meth:`Series.idxmin` and :meth:`Series.idxmax` for the old behavior (:issue:`16955`) -- Passing a tz-aware ``datetime.datetime`` or :class:`Timestamp` into the :class:`Timestamp` constructor with the ``tz`` argument now raises a ``ValueError`` (:issue:`23621`) -- Removed ``Series.base``, ``Index.base``, ``Categorical.base``, ``Series.flags``, ``Index.flags``, ``PeriodArray.flags``, ``Series.strides``, ``Index.strides``, ``Series.itemsize``, ``Index.itemsize``, ``Series.data``, ``Index.data`` (:issue:`20721`) -- Changed :meth:`Timedelta.resolution` to match the behavior of the standard library ``datetime.timedelta.resolution``, for the old behavior, use :meth:`Timedelta.resolution_string` (:issue:`26839`) -- Removed ``Timestamp.weekday_name``, ``DatetimeIndex.weekday_name``, and ``Series.dt.weekday_name`` (:issue:`18164`) -- Removed the previously deprecated keyword "errors" in :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` (:issue:`22644`) -- Changed the default value for ``ordered`` in :class:`CategoricalDtype` from ``None`` to ``False`` (:issue:`26336`) -- :meth:`Series.set_axis` and :meth:`DataFrame.set_axis` now require "labels" as the first argument and "axis" as an optional named parameter (:issue:`30089`) -- Removed ``to_msgpack``, ``read_msgpack``, ``DataFrame.to_msgpack``, ``Series.to_msgpack`` (:issue:`27103`) -- Removed ``Series.compress`` (:issue:`21930`) -- Removed the previously deprecated keyword "fill_value" from :meth:`Categorical.fillna`, use "value" instead (:issue:`19269`) -- Removed the previously deprecated keyword "data" from :func:`andrews_curves`, use "frame" instead (:issue:`6956`) -- Removed the previously deprecated keyword "data" from :func:`parallel_coordinates`, use "frame" instead (:issue:`6956`) -- Removed the previously deprecated keyword "colors" from :func:`parallel_coordinates`, use "color" instead (:issue:`6956`) -- Removed the previously deprecated keywords "verbose" and "private_key" from :func:`read_gbq` (:issue:`30200`) -- Calling ``np.array`` and ``np.asarray`` on tz-aware :class:`Series` and :class:`DatetimeIndex` will now return an object array of tz-aware :class:`Timestamp` (:issue:`24596`) +- - .. --------------------------------------------------------------------------- -.. _whatsnew_100.performance: + +.. _whatsnew_110.performance: Performance improvements ~~~~~~~~~~~~~~~~~~~~~~~~ -- Performance improvement in :class:`DataFrame` arithmetic and comparison operations with scalars (:issue:`24990`, :issue:`29853`) -- Performance improvement in indexing with a non-unique :class:`IntervalIndex` (:issue:`27489`) -- Performance improvement in :attr:`MultiIndex.is_monotonic` (:issue:`27495`) -- Performance improvement in :func:`cut` when ``bins`` is an :class:`IntervalIndex` (:issue:`27668`) -- Performance improvement when initializing a :class:`DataFrame` using a ``range`` (:issue:`30171`) -- Performance improvement in :meth:`DataFrame.corr` when ``method`` is ``"spearman"`` (:issue:`28139`) -- Performance improvement in :meth:`DataFrame.replace` when provided a list of values to replace (:issue:`28099`) -- Performance improvement in :meth:`DataFrame.select_dtypes` by using vectorization instead of iterating over a loop (:issue:`28317`) -- Performance improvement in :meth:`Categorical.searchsorted` and :meth:`CategoricalIndex.searchsorted` (:issue:`28795`) -- Performance improvement when comparing a :class:`Categorical` with a scalar and the scalar is not found in the categories (:issue:`29750`) -- Performance improvement when checking if values in a :class:`Categorical` are equal, equal or larger or larger than a given scalar. - The improvement is not present if checking if the :class:`Categorical` is less than or less than or equal than the scalar (:issue:`29820`) -- Performance improvement in :meth:`Index.equals` and :meth:`MultiIndex.equals` (:issue:`29134`) -- Performance improvement in :func:`~pandas.api.types.infer_dtype` when ``skipna`` is ``True`` (:issue:`28814`) +- Performance improvement in :class:`Timedelta` constructor (:issue:`30543`) +- Performance improvement in :class:`Timestamp` constructor (:issue:`30543`) +- +- .. --------------------------------------------------------------------------- -.. _whatsnew_100.bug_fixes: +.. _whatsnew_110.bug_fixes: Bug fixes ~~~~~~~~~ @@ -968,61 +96,19 @@ Bug fixes Categorical ^^^^^^^^^^^ -- Added test to assert the :func:`fillna` raises the correct ``ValueError`` message when the value isn't a value from categories (:issue:`13628`) -- Bug in :meth:`Categorical.astype` where ``NaN`` values were handled incorrectly when casting to int (:issue:`28406`) -- :meth:`DataFrame.reindex` with a :class:`CategoricalIndex` would fail when the targets contained duplicates, and wouldn't fail if the source contained duplicates (:issue:`28107`) -- Bug in :meth:`Categorical.astype` not allowing for casting to extension dtypes (:issue:`28668`) -- Bug where :func:`merge` was unable to join on categorical and extension dtype columns (:issue:`28668`) -- :meth:`Categorical.searchsorted` and :meth:`CategoricalIndex.searchsorted` now work on unordered categoricals also (:issue:`21667`) -- Added test to assert roundtripping to parquet with :func:`DataFrame.to_parquet` or :func:`read_parquet` will preserve Categorical dtypes for string types (:issue:`27955`) -- Changed the error message in :meth:`Categorical.remove_categories` to always show the invalid removals as a set (:issue:`28669`) -- Using date accessors on a categorical dtyped :class:`Series` of datetimes was not returning an object of the - same type as if one used the :meth:`.str.` / :meth:`.dt.` on a :class:`Series` of that type. E.g. when accessing :meth:`Series.dt.tz_localize` on a - :class:`Categorical` with duplicate entries, the accessor was skipping duplicates (:issue:`27952`) -- Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` that would give incorrect results on categorical data (:issue:`26988`) -- Bug where calling :meth:`Categorical.min` or :meth:`Categorical.max` on an empty Categorical would raise a numpy exception (:issue:`30227`) -- The following methods now also correctly output values for unobserved categories when called through ``groupby(..., observed=False)`` (:issue:`17605`) - * :meth:`core.groupby.SeriesGroupBy.count` - * :meth:`core.groupby.SeriesGroupBy.size` - * :meth:`core.groupby.SeriesGroupBy.nunique` - * :meth:`core.groupby.SeriesGroupBy.nth` - +- +- Datetimelike ^^^^^^^^^^^^ -- Bug in :meth:`Series.__setitem__` incorrectly casting ``np.timedelta64("NaT")`` to ``np.datetime64("NaT")`` when inserting into a :class:`Series` with datetime64 dtype (:issue:`27311`) -- Bug in :meth:`Series.dt` property lookups when the underlying data is read-only (:issue:`27529`) -- Bug in ``HDFStore.__getitem__`` incorrectly reading tz attribute created in Python 2 (:issue:`26443`) -- Bug in :func:`to_datetime` where passing arrays of malformed ``str`` with errors="coerce" could incorrectly lead to raising ``ValueError`` (:issue:`28299`) -- Bug in :meth:`core.groupby.SeriesGroupBy.nunique` where ``NaT`` values were interfering with the count of unique values (:issue:`27951`) -- Bug in :class:`Timestamp` subtraction when subtracting a :class:`Timestamp` from a ``np.datetime64`` object incorrectly raising ``TypeError`` (:issue:`28286`) -- Addition and subtraction of integer or integer-dtype arrays with :class:`Timestamp` will now raise ``NullFrequencyError`` instead of ``ValueError`` (:issue:`28268`) -- Bug in :class:`Series` and :class:`DataFrame` with integer dtype failing to raise ``TypeError`` when adding or subtracting a ``np.datetime64`` object (:issue:`28080`) -- Bug in :meth:`Series.astype`, :meth:`Index.astype`, and :meth:`DataFrame.astype` failing to handle ``NaT`` when casting to an integer dtype (:issue:`28492`) -- Bug in :class:`Week` with ``weekday`` incorrectly raising ``AttributeError`` instead of ``TypeError`` when adding or subtracting an invalid type (:issue:`28530`) -- Bug in :class:`DataFrame` arithmetic operations when operating with a :class:`Series` with dtype `'timedelta64[ns]'` (:issue:`28049`) -- Bug in :func:`core.groupby.generic.SeriesGroupBy.apply` raising ``ValueError`` when a column in the original DataFrame is a datetime and the column labels are not standard integers (:issue:`28247`) -- Bug in :func:`pandas._config.localization.get_locales` where the ``locales -a`` encodes the locales list as windows-1252 (:issue:`23638`, :issue:`24760`, :issue:`27368`) -- Bug in :meth:`Series.var` failing to raise ``TypeError`` when called with ``timedelta64[ns]`` dtype (:issue:`28289`) -- Bug in :meth:`DatetimeIndex.strftime` and :meth:`Series.dt.strftime` where ``NaT`` was converted to the string ``'NaT'`` instead of ``np.nan`` (:issue:`29578`) -- Bug in masking datetime-like arrays with a boolean mask of an incorrect length not raising an ``IndexError`` (:issue:`30308`) -- Bug in :attr:`Timestamp.resolution` being a property instead of a class attribute (:issue:`29910`) -- Bug in :func:`pandas.to_datetime` when called with ``None`` raising ``TypeError`` instead of returning ``NaT`` (:issue:`30011`) -- Bug in :func:`pandas.to_datetime` failing for `deques` when using ``cache=True`` (the default) (:issue:`29403`) -- Bug in :meth:`Series.item` with ``datetime64`` or ``timedelta64`` dtype, :meth:`DatetimeIndex.item`, and :meth:`TimedeltaIndex.item` returning an integer instead of a :class:`Timestamp` or :class:`Timedelta` (:issue:`30175`) -- Bug in :class:`DatetimeIndex` addition when adding a non-optimized :class:`DateOffset` incorrectly dropping timezone information (:issue:`30336`) -- Bug in :meth:`DataFrame.drop` where attempting to drop non-existent values from a DatetimeIndex would yield a confusing error message (:issue:`30399`) -- Bug in :meth:`DataFrame.append` would remove the timezone-awareness of new data (:issue:`30238`) -- Bug in :meth:`Series.cummin` and :meth:`Series.cummax` with timezone-aware dtype incorrectly dropping its timezone (:issue:`15553`) -- Bug in :class:`DatetimeArray`, :class:`TimedeltaArray`, and :class:`PeriodArray` where inplace addition and subtraction did not actually operate inplace (:issue:`24115`) -- Bug in :func:`pandas.to_datetime` when called with ``Series`` storing ``IntegerArray`` raising ``TypeError`` instead of returning ``Series`` (:issue:`30050`) -- Bug in :func:`date_range` with custom business hours as ``freq`` and given number of ``periods`` (:issue:`30593`) -- Bug in :class:`PeriodIndex` comparisons with incorrectly casting integers to :class:`Period` objects, inconsistent with the :class:`Period` comparison behavior (:issue:`30722`) -- Bug in :meth:`DatetimeIndex.insert` raising a ``ValueError`` instead of a ``TypeError`` when trying to insert a timezone-aware :class:`Timestamp` into a timezone-naive :class:`DatetimeIndex`, or vice-versa (:issue:`30806`) + +- Bug in :class:`Timestamp` where constructing :class:`Timestamp` from ambiguous epoch time and calling constructor again changed :meth:`Timestamp.value` property (:issue:`24329`) +- :meth:`DatetimeArray.searchsorted`, :meth:`TimedeltaArray.searchsorted`, :meth:`PeriodArray.searchsorted` not recognizing non-pandas scalars and incorrectly raising ``ValueError`` instead of ``TypeError`` (:issue:`30950`) +- Bug in :class:`Timestamp` where constructing :class:`Timestamp` with dateutil timezone less than 128 nanoseconds before daylight saving time switch from winter to summer would result in nonexistent time (:issue:`31043`) Timedelta ^^^^^^^^^ -- Bug in subtracting a :class:`TimedeltaIndex` or :class:`TimedeltaArray` from a ``np.datetime64`` object (:issue:`29558`) + - - @@ -1035,62 +121,34 @@ Timezones Numeric ^^^^^^^ -- Bug in :meth:`DataFrame.quantile` with zero-column :class:`DataFrame` incorrectly raising (:issue:`23925`) -- :class:`DataFrame` flex inequality comparisons methods (:meth:`DataFrame.lt`, :meth:`DataFrame.le`, :meth:`DataFrame.gt`, :meth:`DataFrame.ge`) with object-dtype and ``complex`` entries failing to raise ``TypeError`` like their :class:`Series` counterparts (:issue:`28079`) -- Bug in :class:`DataFrame` logical operations (`&`, `|`, `^`) not matching :class:`Series` behavior by filling NA values (:issue:`28741`) -- Bug in :meth:`DataFrame.interpolate` where specifying axis by name references variable before it is assigned (:issue:`29142`) -- Bug in :meth:`Series.var` not computing the right value with a nullable integer dtype series not passing through ddof argument (:issue:`29128`) -- Improved error message when using `frac` > 1 and `replace` = False (:issue:`27451`) -- Bug in numeric indexes resulted in it being possible to instantiate an :class:`Int64Index`, :class:`UInt64Index`, or :class:`Float64Index` with an invalid dtype (e.g. datetime-like) (:issue:`29539`) -- Bug in :class:`UInt64Index` precision loss while constructing from a list with values in the ``np.uint64`` range (:issue:`29526`) -- Bug in :class:`NumericIndex` construction that caused indexing to fail when integers in the ``np.uint64`` range were used (:issue:`28023`) -- Bug in :class:`NumericIndex` construction that caused :class:`UInt64Index` to be casted to :class:`Float64Index` when integers in the ``np.uint64`` range were used to index a :class:`DataFrame` (:issue:`28279`) -- Bug in :meth:`Series.interpolate` when using method=`index` with an unsorted index, would previously return incorrect results. (:issue:`21037`) -- Bug in :meth:`DataFrame.round` where a :class:`DataFrame` with a :class:`CategoricalIndex` of :class:`IntervalIndex` columns would incorrectly raise a ``TypeError`` (:issue:`30063`) -- Bug in :meth:`Series.pct_change` and :meth:`DataFrame.pct_change` when there are duplicated indices (:issue:`30463`) -- Bug in :class:`DataFrame` cumulative operations (e.g. cumsum, cummax) incorrect casting to object-dtype (:issue:`19296`) -- Bug in dtypes being lost in ``DataFrame.__invert__`` (``~`` operator) with mixed dtypes (:issue:`31183`) -- Bug in :class:`~DataFrame.diff` losing the dtype for extension types (:issue:`30889`) -- Bug in :class:`DataFrame.diff` raising an ``IndexError`` when one of the columns was a nullable integer dtype (:issue:`30967`) +- Bug in :meth:`DataFrame.floordiv` with ``axis=0`` not treating division-by-zero like :meth:`Series.floordiv` (:issue:`31271`) +- +- Conversion ^^^^^^^^^^ - +- Bug in :class:`Series` construction from NumPy array with big-endian ``datetime64`` dtype (:issue:`29684`) - - Strings ^^^^^^^ -- Calling :meth:`Series.str.isalnum` (and other "ismethods") on an empty ``Series`` would return an ``object`` dtype instead of ``bool`` (:issue:`29624`) +- - Interval ^^^^^^^^ -- Bug in :meth:`IntervalIndex.get_indexer` where a :class:`Categorical` or :class:`CategoricalIndex` ``target`` would incorrectly raise a ``TypeError`` (:issue:`30063`) -- Bug in ``pandas.core.dtypes.cast.infer_dtype_from_scalar`` where passing ``pandas_dtype=True`` did not infer :class:`IntervalDtype` (:issue:`30337`) -- Bug in :class:`Series` constructor where constructing a ``Series`` from a ``list`` of :class:`Interval` objects resulted in ``object`` dtype instead of :class:`IntervalDtype` (:issue:`23563`) -- Bug in :class:`IntervalDtype` where the ``kind`` attribute was incorrectly set as ``None`` instead of ``"O"`` (:issue:`30568`) -- Bug in :class:`IntervalIndex`, :class:`~arrays.IntervalArray`, and :class:`Series` with interval data where equality comparisons were incorrect (:issue:`24112`) +- +- Indexing ^^^^^^^^ - -- Bug in assignment using a reverse slicer (:issue:`26939`) -- Bug in :meth:`DataFrame.explode` would duplicate frame in the presence of duplicates in the index (:issue:`28010`) -- Bug in reindexing a :meth:`PeriodIndex` with another type of index that contained a `Period` (:issue:`28323`) (:issue:`28337`) -- Fix assignment of column via `.loc` with numpy non-ns datetime type (:issue:`27395`) -- Bug in :meth:`Float64Index.astype` where ``np.inf`` was not handled properly when casting to an integer dtype (:issue:`28475`) -- :meth:`Index.union` could fail when the left contained duplicates (:issue:`28257`) -- Bug when indexing with ``.loc`` where the index was a :class:`CategoricalIndex` with non-string categories didn't work (:issue:`17569`, :issue:`30225`) -- :meth:`Index.get_indexer_non_unique` could fail with ``TypeError`` in some cases, such as when searching for ints in a string index (:issue:`28257`) -- Bug in :meth:`Float64Index.get_loc` incorrectly raising ``TypeError`` instead of ``KeyError`` (:issue:`29189`) -- Bug in :meth:`DataFrame.loc` with incorrect dtype when setting Categorical value in 1-row DataFrame (:issue:`25495`) -- :meth:`MultiIndex.get_loc` can't find missing values when input includes missing values (:issue:`19132`) -- Bug in :meth:`Series.__setitem__` incorrectly assigning values with boolean indexer when the length of new data matches the number of ``True`` values and new data is not a ``Series`` or an ``np.array`` (:issue:`30567`) -- Bug in indexing with a :class:`PeriodIndex` incorrectly accepting integers representing years, use e.g. ``ser.loc["2007"]`` instead of ``ser.loc[2007]`` (:issue:`30763`) +- Bug in slicing on a :class:`DatetimeIndex` with a partial-timestamp dropping high-resolution indices near the end of a year, quarter, or month (:issue:`31064`) +- Bug in :meth:`PeriodIndex.get_loc` treating higher-resolution strings differently from :meth:`PeriodIndex.get_value` (:issue:`31172`) +- Missing ^^^^^^^ @@ -1101,138 +159,60 @@ Missing MultiIndex ^^^^^^^^^^ -- Constructor for :class:`MultiIndex` verifies that the given ``sortorder`` is compatible with the actual ``lexsort_depth`` if ``verify_integrity`` parameter is ``True`` (the default) (:issue:`28735`) -- Series and MultiIndex `.drop` with `MultiIndex` raise exception if labels not in given in level (:issue:`8594`) +- - I/O ^^^ - -- :meth:`read_csv` now accepts binary mode file buffers when using the Python csv engine (:issue:`23779`) -- Bug in :meth:`DataFrame.to_json` where using a Tuple as a column or index value and using ``orient="columns"`` or ``orient="index"`` would produce invalid JSON (:issue:`20500`) -- Improve infinity parsing. :meth:`read_csv` now interprets ``Infinity``, ``+Infinity``, ``-Infinity`` as floating point values (:issue:`10065`) -- Bug in :meth:`DataFrame.to_csv` where values were truncated when the length of ``na_rep`` was shorter than the text input data. (:issue:`25099`) -- Bug in :func:`DataFrame.to_string` where values were truncated using display options instead of outputting the full content (:issue:`9784`) -- Bug in :meth:`DataFrame.to_json` where a datetime column label would not be written out in ISO format with ``orient="table"`` (:issue:`28130`) -- Bug in :func:`DataFrame.to_parquet` where writing to GCS would fail with `engine='fastparquet'` if the file did not already exist (:issue:`28326`) -- Bug in :func:`read_hdf` closing stores that it didn't open when Exceptions are raised (:issue:`28699`) -- Bug in :meth:`DataFrame.read_json` where using ``orient="index"`` would not maintain the order (:issue:`28557`) -- Bug in :meth:`DataFrame.to_html` where the length of the ``formatters`` argument was not verified (:issue:`28469`) -- Bug in :meth:`DataFrame.read_excel` with ``engine='ods'`` when ``sheet_name`` argument references a non-existent sheet (:issue:`27676`) -- Bug in :meth:`pandas.io.formats.style.Styler` formatting for floating values not displaying decimals correctly (:issue:`13257`) -- Bug in :meth:`DataFrame.to_html` when using ``formatters=`` and ``max_cols`` together. (:issue:`25955`) -- Bug in :meth:`Styler.background_gradient` not able to work with dtype ``Int64`` (:issue:`28869`) -- Bug in :meth:`DataFrame.to_clipboard` which did not work reliably in ipython (:issue:`22707`) -- Bug in :func:`read_json` where default encoding was not set to ``utf-8`` (:issue:`29565`) -- Bug in :class:`PythonParser` where str and bytes were being mixed when dealing with the decimal field (:issue:`29650`) -- :meth:`read_gbq` now accepts ``progress_bar_type`` to display progress bar while the data downloads. (:issue:`29857`) -- Bug in :func:`pandas.io.json.json_normalize` where a missing value in the location specified by `record_path` would raise a ``TypeError`` (:issue:`30148`) -- :func:`read_excel` now accepts binary data (:issue:`15914`) -- Bug in :meth:`read_csv` in which encoding handling was limited to just the string `utf-16` for the C engine (:issue:`24130`) +- Bug in :meth:`read_json` where integer overflow was occuring when json contains big number strings. (:issue:`30320`) +- +- Plotting ^^^^^^^^ -- Bug in :meth:`Series.plot` not able to plot boolean values (:issue:`23719`) -- Bug in :meth:`DataFrame.plot` not able to plot when no rows (:issue:`27758`) -- Bug in :meth:`DataFrame.plot` producing incorrect legend markers when plotting multiple series on the same axis (:issue:`18222`) -- Bug in :meth:`DataFrame.plot` when ``kind='box'`` and data contains datetime or timedelta data. These types are now automatically dropped (:issue:`22799`) -- Bug in :meth:`DataFrame.plot.line` and :meth:`DataFrame.plot.area` produce wrong xlim in x-axis (:issue:`27686`, :issue:`25160`, :issue:`24784`) -- Bug where :meth:`DataFrame.boxplot` would not accept a ``color`` parameter like :meth:`DataFrame.plot.box` (:issue:`26214`) -- Bug in the ``xticks`` argument being ignored for :meth:`DataFrame.plot.bar` (:issue:`14119`) -- :func:`set_option` now validates that the plot backend provided to ``'plotting.backend'`` implements the backend when the option is set, rather than when a plot is created (:issue:`28163`) -- :meth:`DataFrame.plot` now allow a ``backend`` keyword argument to allow changing between backends in one session (:issue:`28619`). -- Bug in color validation incorrectly raising for non-color styles (:issue:`29122`). -- Allow :meth:`DataFrame.plot.scatter` to plot ``objects`` and ``datetime`` type data (:issue:`18755`, :issue:`30391`) -- Bug in :meth:`DataFrame.hist`, ``xrot=0`` does not work with ``by`` and subplots (:issue:`30288`). +- :func:`.plot` for line/bar now accepts color by dictonary (:issue:`8193`). +- Groupby/resample/rolling ^^^^^^^^^^^^^^^^^^^^^^^^ -- Bug in :meth:`core.groupby.DataFrameGroupBy.apply` only showing output from a single group when function returns an :class:`Index` (:issue:`28652`) -- Bug in :meth:`DataFrame.groupby` with multiple groups where an ``IndexError`` would be raised if any group contained all NA values (:issue:`20519`) -- Bug in :meth:`pandas.core.resample.Resampler.size` and :meth:`pandas.core.resample.Resampler.count` returning wrong dtype when used with an empty :class:`Series` or :class:`DataFrame` (:issue:`28427`) -- Bug in :meth:`DataFrame.rolling` not allowing for rolling over datetimes when ``axis=1`` (:issue:`28192`) -- Bug in :meth:`DataFrame.rolling` not allowing rolling over multi-index levels (:issue:`15584`). -- Bug in :meth:`DataFrame.rolling` not allowing rolling on monotonic decreasing time indexes (:issue:`19248`). -- Bug in :meth:`DataFrame.groupby` not offering selection by column name when ``axis=1`` (:issue:`27614`) -- Bug in :meth:`core.groupby.DataFrameGroupby.agg` not able to use lambda function with named aggregation (:issue:`27519`) -- Bug in :meth:`DataFrame.groupby` losing column name information when grouping by a categorical column (:issue:`28787`) -- Remove error raised due to duplicated input functions in named aggregation in :meth:`DataFrame.groupby` and :meth:`Series.groupby`. Previously error will be raised if the same function is applied on the same column and now it is allowed if new assigned names are different. (:issue:`28426`) -- :meth:`core.groupby.SeriesGroupBy.value_counts` will be able to handle the case even when the :class:`Grouper` makes empty groups (:issue:`28479`) -- Bug in :meth:`core.window.rolling.Rolling.quantile` ignoring ``interpolation`` keyword argument when used within a groupby (:issue:`28779`) -- Bug in :meth:`DataFrame.groupby` where ``any``, ``all``, ``nunique`` and transform functions would incorrectly handle duplicate column labels (:issue:`21668`) -- Bug in :meth:`core.groupby.DataFrameGroupBy.agg` with timezone-aware datetime64 column incorrectly casting results to the original dtype (:issue:`29641`) -- Bug in :meth:`DataFrame.groupby` when using axis=1 and having a single level columns index (:issue:`30208`) -- Bug in :meth:`DataFrame.groupby` when using nunique on axis=1 (:issue:`30253`) -- Bug in :meth:`GroupBy.quantile` with multiple list-like q value and integer column names (:issue:`30289`) -- Bug in :meth:`GroupBy.pct_change` and :meth:`core.groupby.SeriesGroupBy.pct_change` causes ``TypeError`` when ``fill_method`` is ``None`` (:issue:`30463`) -- Bug in :meth:`Rolling.count` and :meth:`Expanding.count` argument where ``min_periods`` was ignored (:issue:`26996`) +- Bug in :meth:`GroupBy.apply` raises ``ValueError`` when the ``by`` axis is not sorted and has duplicates and the applied ``func`` does not mutate passed in objects (:issue:`30667`) +- Bug in :meth:`DataFrameGroupby.transform` produces incorrect result with transformation functions (:issue:`30918`) Reshaping ^^^^^^^^^ -- Bug in :meth:`DataFrame.apply` that caused incorrect output with empty :class:`DataFrame` (:issue:`28202`, :issue:`21959`) -- Bug in :meth:`DataFrame.stack` not handling non-unique indexes correctly when creating MultiIndex (:issue:`28301`) -- Bug in :meth:`pivot_table` not returning correct type ``float`` when ``margins=True`` and ``aggfunc='mean'`` (:issue:`24893`) -- Bug :func:`merge_asof` could not use :class:`datetime.timedelta` for ``tolerance`` kwarg (:issue:`28098`) -- Bug in :func:`merge`, did not append suffixes correctly with MultiIndex (:issue:`28518`) -- :func:`qcut` and :func:`cut` now handle boolean input (:issue:`20303`) -- Fix to ensure all int dtypes can be used in :func:`merge_asof` when using a tolerance value. Previously every non-int64 type would raise an erroneous ``MergeError`` (:issue:`28870`). -- Better error message in :func:`get_dummies` when `columns` isn't a list-like value (:issue:`28383`) -- Bug in :meth:`Index.join` that caused infinite recursion error for mismatched ``MultiIndex`` name orders. (:issue:`25760`, :issue:`28956`) -- Bug :meth:`Series.pct_change` where supplying an anchored frequency would throw a ``ValueError`` (:issue:`28664`) -- Bug where :meth:`DataFrame.equals` returned True incorrectly in some cases when two DataFrames had the same columns in different orders (:issue:`28839`) -- Bug in :meth:`DataFrame.replace` that caused non-numeric replacer's dtype not respected (:issue:`26632`) -- Bug in :func:`melt` where supplying mixed strings and numeric values for ``id_vars`` or ``value_vars`` would incorrectly raise a ``ValueError`` (:issue:`29718`) -- Dtypes are now preserved when transposing a ``DataFrame`` where each column is the same extension dtype (:issue:`30091`) -- Bug in :func:`merge_asof` merging on a tz-aware ``left_index`` and ``right_on`` a tz-aware column (:issue:`29864`) -- Improved error message and docstring in :func:`cut` and :func:`qcut` when `labels=True` (:issue:`13318`) -- Bug in missing `fill_na` parameter to :meth:`DataFrame.unstack` with list of levels (:issue:`30740`) +- Bug effecting all numeric and boolean reduction methods not returning subclassed data type. (:issue:`25596`) +- Bug in :meth:`DataFrame.pivot_table` when only MultiIndexed columns is set (:issue:`17038`) +- Bug in :meth:`DataFrame.unstack` and :meth:`Series.unstack` can take tuple names in MultiIndexed data (:issue:`19966`) +- Bug in :meth:`DataFrame.pivot_table` when ``margin`` is ``True`` and only ``column`` is defined (:issue:`31016`) +- Fix incorrect error message in :meth:`DataFrame.pivot` when ``columns`` is set to ``None``. (:issue:`30924`) +- Bug in :func:`crosstab` when inputs are two Series and have tuple names, the output will keep dummy MultiIndex as columns. (:issue:`18321`) +- Bug in :func:`concat` where the resulting indices are not copied when ``copy=True`` (:issue:`29879`) Sparse ^^^^^^ -- Bug in :class:`SparseDataFrame` arithmetic operations incorrectly casting inputs to float (:issue:`28107`) -- Bug in ``DataFrame.sparse`` returning a ``Series`` when there was a column named ``sparse`` rather than the accessor (:issue:`30758`) -- Fixed :meth:`operator.xor` with a boolean-dtype ``SparseArray``. Now returns a sparse result, rather than object dtype (:issue:`31025`) + +- +- ExtensionArray ^^^^^^^^^^^^^^ -- Bug in :class:`arrays.PandasArray` when setting a scalar string (:issue:`28118`, :issue:`28150`). -- Bug where nullable integers could not be compared to strings (:issue:`28930`) -- Bug where :class:`DataFrame` constructor raised ``ValueError`` with list-like data and ``dtype`` specified (:issue:`30280`) -- Bug in dtype being lost in ``__invert__`` (``~`` operator) for extension-array backed ``Series`` and ``DataFrame`` (:issue:`23087`) +- +- Other ^^^^^ -- Trying to set the ``display.precision``, ``display.max_rows`` or ``display.max_columns`` using :meth:`set_option` to anything but a ``None`` or a positive int will raise a ``ValueError`` (:issue:`23348`) -- Using :meth:`DataFrame.replace` with overlapping keys in a nested dictionary will no longer raise, now matching the behavior of a flat dictionary (:issue:`27660`) -- :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` now support dicts as ``compression`` argument with key ``'method'`` being the compression method and others as additional compression options when the compression method is ``'zip'``. (:issue:`26023`) -- Bug in :meth:`Series.diff` where a boolean series would incorrectly raise a ``TypeError`` (:issue:`17294`) -- :meth:`Series.append` will no longer raise a ``TypeError`` when passed a tuple of ``Series`` (:issue:`28410`) -- Fix corrupted error message when calling ``pandas.libs._json.encode()`` on a 0d array (:issue:`18878`) -- Backtick quoting in :meth:`DataFrame.query` and :meth:`DataFrame.eval` can now also be used to use invalid identifiers like names that start with a digit, are python keywords, or are using single character operators. (:issue:`27017`) -- Bug in ``pd.core.util.hashing.hash_pandas_object`` where arrays containing tuples were incorrectly treated as non-hashable (:issue:`28969`) -- Bug in :meth:`DataFrame.append` that raised ``IndexError`` when appending with empty list (:issue:`28769`) -- Fix :class:`AbstractHolidayCalendar` to return correct results for - years after 2030 (now goes up to 2200) (:issue:`27790`) -- Fixed :class:`~arrays.IntegerArray` returning ``inf`` rather than ``NaN`` for operations dividing by ``0`` (:issue:`27398`) -- Fixed ``pow`` operations for :class:`~arrays.IntegerArray` when the other value is ``0`` or ``1`` (:issue:`29997`) -- Bug in :meth:`Series.count` raises if use_inf_as_na is enabled (:issue:`29478`) -- Bug in :class:`Index` where a non-hashable name could be set without raising ``TypeError`` (:issue:`29069`) -- Bug in :class:`DataFrame` constructor when passing a 2D ``ndarray`` and an extension dtype (:issue:`12513`) -- Bug in :meth:`DataFrame.to_csv` when supplied a series with a ``dtype="string"`` and a ``na_rep``, the ``na_rep`` was being truncated to 2 characters. (:issue:`29975`) -- Bug where :meth:`DataFrame.itertuples` would incorrectly determine whether or not namedtuples could be used for dataframes of 255 columns (:issue:`28282`) -- Handle nested NumPy ``object`` arrays in :func:`testing.assert_series_equal` for ExtensionArray implementations (:issue:`30841`) -- Bug in :class:`Index` constructor incorrectly allowing 2-dimensional input arrays (:issue:`13601`, :issue:`27125`) +- Appending a dictionary to a :class:`DataFrame` without passing ``ignore_index=True`` will raise ``TypeError: Can only append a dict if ignore_index=True`` + instead of ``TypeError: Can only append a Series if ignore_index=True or if the Series has a name`` (:issue:`30871`) +- .. --------------------------------------------------------------------------- -.. _whatsnew_100.contributors: +.. _whatsnew_110.contributors: Contributors ~~~~~~~~~~~~ - -.. contributors:: v0.25.3..v1.0.0rc0 From ddf890f51e691ba5db00db99105db0c39360d4d7 Mon Sep 17 00:00:00 2001 From: rjfs Date: Mon, 27 Jan 2020 23:11:28 +0100 Subject: [PATCH 10/15] Update v1.0.0.rst --- doc/source/whatsnew/v1.0.0.rst | 1179 +++++++++++++++++++++++++++++--- 1 file changed, 1099 insertions(+), 80 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 920919755dc23..ada82987921ec 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -1,11 +1,35 @@ -.. _whatsnew_110: +.. _whatsnew_100: -What's new in 1.1.0 (??) +What's new in 1.0.0 (??) ------------------------ -These are the changes in pandas 1.1.0. See :ref:`release` for a full changelog +These are the changes in pandas 1.0.0. See :ref:`release` for a full changelog including other versions of pandas. +.. note:: + + The pandas 1.0 release removed a lot of functionality that was deprecated + in previous releases (see :ref:`below ` + for an overview). It is recommended to first upgrade to pandas 0.25 and to + ensure your code is working without warnings, before upgrading to pandas + 1.0. + + +New Deprecation Policy +~~~~~~~~~~~~~~~~~~~~~~ + +Starting with Pandas 1.0.0, pandas will adopt a variant of `SemVer`_ to +version releases. Briefly, + +* Deprecations will be introduced in minor releases (e.g. 1.1.0, 1.2.0, 2.1.0, ...) +* Deprecations will be enforced in major releases (e.g. 1.0.0, 2.0.0, 3.0.0, ...) +* API-breaking changes will be made only in major releases (except for experimental features) + +See :ref:`policies.version` for more. + +.. _2019 Pandas User Survey: http://dev.pandas.io/pandas-blog/2019-pandas-user-survey.html +.. _SemVer: https://semver.org + {{ header }} .. --------------------------------------------------------------------------- @@ -13,81 +37,928 @@ including other versions of pandas. Enhancements ~~~~~~~~~~~~ -.. _whatsnew_110.period_index_partial_string_slicing: +.. _whatsnew_100.NA: + +Experimental ``NA`` scalar to denote missing values +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A new ``pd.NA`` value (singleton) is introduced to represent scalar missing +values. Up to now, pandas used several values to represent missing data: ``np.nan`` is used for this for float data, ``np.nan`` or +``None`` for object-dtype data and ``pd.NaT`` for datetime-like data. The +goal of ``pd.NA`` is to provide a "missing" indicator that can be used +consistently across data types. ``pd.NA`` is currently used by the nullable integer and boolean +data types and the new string data type (:issue:`28095`). + +.. warning:: + + Experimental: the behaviour of ``pd.NA`` can still change without warning. + +For example, creating a Series using the nullable integer dtype: + +.. ipython:: python + + s = pd.Series([1, 2, None], dtype="Int64") + s + s[2] + +Compared to ``np.nan``, ``pd.NA`` behaves differently in certain operations. +In addition to arithmetic operations, ``pd.NA`` also propagates as "missing" +or "unknown" in comparison operations: + +.. ipython:: python + + np.nan > 1 + pd.NA > 1 + +For logical operations, ``pd.NA`` follows the rules of the +`three-valued logic `__ (or +*Kleene logic*). For example: + +.. ipython:: python + + pd.NA | True + +For more, see :ref:`NA section ` in the user guide on missing +data. + + +.. _whatsnew_100.string: + +Dedicated string data type +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +We've added :class:`StringDtype`, an extension type dedicated to string data. +Previously, strings were typically stored in object-dtype NumPy arrays. (:issue:`29975`) + +.. warning:: + + ``StringDtype`` is currently considered experimental. The implementation + and parts of the API may change without warning. + +The ``'string'`` extension type solves several issues with object-dtype NumPy arrays: + +1. You can accidentally store a *mixture* of strings and non-strings in an + ``object`` dtype array. A ``StringArray`` can only store strings. +2. ``object`` dtype breaks dtype-specific operations like :meth:`DataFrame.select_dtypes`. + There isn't a clear way to select *just* text while excluding non-text, + but still object-dtype columns. +3. When reading code, the contents of an ``object`` dtype array is less clear + than ``string``. + + +.. ipython:: python + + pd.Series(['abc', None, 'def'], dtype=pd.StringDtype()) + +You can use the alias ``"string"`` as well. + +.. ipython:: python + + s = pd.Series(['abc', None, 'def'], dtype="string") + s + +The usual string accessor methods work. Where appropriate, the return type +of the Series or columns of a DataFrame will also have string dtype. + +.. ipython:: python + + s.str.upper() + s.str.split('b', expand=True).dtypes + +String accessor methods returning integers will return a value with :class:`Int64Dtype` + +.. ipython:: python + + s.str.count("a") + +We recommend explicitly using the ``string`` data type when working with strings. +See :ref:`text.types` for more. + +.. _whatsnew_100.boolean: + +Boolean data type with missing values support +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +We've added :class:`BooleanDtype` / :class:`~arrays.BooleanArray`, an extension +type dedicated to boolean data that can hold missing values. The default +``bool`` data type based on a bool-dtype NumPy array, the column can only hold +``True`` or ``False``, and not missing values. This new :class:`~arrays.BooleanArray` +can store missing values as well by keeping track of this in a separate mask. +(:issue:`29555`, :issue:`30095`, :issue:`31131`) + +.. ipython:: python + + pd.Series([True, False, None], dtype=pd.BooleanDtype()) + +You can use the alias ``"boolean"`` as well. + +.. ipython:: python + + s = pd.Series([True, False, None], dtype="boolean") + s + +.. _whatsnew_100.convert_dtypes: + +``convert_dtypes`` method to ease use of supported extension dtypes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In order to encourage use of the extension dtypes ``StringDtype``, +``BooleanDtype``, ``Int64Dtype``, ``Int32Dtype``, etc., that support ``pd.NA``, the +methods :meth:`DataFrame.convert_dtypes` and :meth:`Series.convert_dtypes` +have been introduced. (:issue:`29752`) (:issue:`30929`) -Nonmonotonic PeriodIndex Partial String Slicing -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Example: -:class:`PeriodIndex` now supports partial string slicing for non-monotonic indexes, mirroring :class:`DatetimeIndex` behavior (:issue:`31096`) +.. ipython:: python -For example: + df = pd.DataFrame({'x': ['abc', None, 'def'], + 'y': [1, 2, np.nan], + 'z': [True, False, True]}) + df + df.dtypes .. ipython:: python - dti = pd.date_range("2014-01-01", periods=30, freq="30D") - pi = dti.to_period("D") - ser_monotonic = pd.Series(np.arange(30), index=pi) - shuffler = list(range(0, 30, 2)) + list(range(1, 31, 2)) - ser = ser_monotonic[shuffler] - ser + converted = df.convert_dtypes() + converted + converted.dtypes + +This is especially useful after reading in data using readers such as :func:`read_csv` +and :func:`read_excel`. +See :ref:`here ` for a description. + +.. _whatsnew_100.numba_rolling_apply: + +Using Numba in ``rolling.apply`` and ``expanding.apply`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +We've added an ``engine`` keyword to :meth:`~core.window.rolling.Rolling.apply` and :meth:`~core.window.expanding.Expanding.apply` +that allows the user to execute the routine using `Numba `__ instead of Cython. +Using the Numba engine can yield significant performance gains if the apply function can operate on numpy arrays and +the data set is larger (1 million rows or greater). For more details, see +:ref:`rolling apply documentation ` (:issue:`28987`, :issue:`30936`) + +.. _whatsnew_100.custom_window: + +Defining custom windows for rolling operations +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +We've added a :func:`pandas.api.indexers.BaseIndexer` class that allows users to define how +window bounds are created during ``rolling`` operations. Users can define their own ``get_window_bounds`` +method on a :func:`pandas.api.indexers.BaseIndexer` subclass that will generate the start and end +indices used for each window during the rolling aggregation. For more details and example usage, see +the :ref:`custom window rolling documentation ` + +.. _whatsnew_100.to_markdown: + +Converting to Markdown +^^^^^^^^^^^^^^^^^^^^^^ + +We've added :meth:`~DataFrame.to_markdown` for creating a markdown table (:issue:`11052`) .. ipython:: python - ser["2014"] - ser.loc["May 2015"] + df = pd.DataFrame({"A": [1, 2, 3], "B": [1, 2, 3]}, index=['a', 'a', 'b']) + print(df.to_markdown()) -.. _whatsnew_110.enhancements.other: +.. _whatsnew_100.enhancements.other: Other enhancements ^^^^^^^^^^^^^^^^^^ -- :class:`Styler` may now render CSS more efficiently where multiple cells have the same styling (:issue:`30876`) -- -- +- :meth:`DataFrame.to_string` added the ``max_colwidth`` parameter to control when wide columns are truncated (:issue:`9784`) +- Added the ``na_value`` argument to :meth:`Series.to_numpy`, :meth:`Index.to_numpy` and :meth:`DataFrame.to_numpy` to control the value used for missing data (:issue:`30322`) +- :meth:`MultiIndex.from_product` infers level names from inputs if not explicitly provided (:issue:`27292`) +- :meth:`DataFrame.to_latex` now accepts ``caption`` and ``label`` arguments (:issue:`25436`) +- The :ref:`integer dtype ` with support for missing values and the + new :ref:`string dtype ` can now be converted to ``pyarrow`` (>= + 0.15.0), which means that it is supported in writing to the Parquet file + format when using the ``pyarrow`` engine. It is currently not yet supported + when converting back to pandas, so it will become an integer or float + (depending on the presence of missing data) or object dtype column. (:issue:`28368`) +- :meth:`DataFrame.to_json` now accepts an ``indent`` integer argument to enable pretty printing of JSON output (:issue:`12004`) +- :meth:`read_stata` can read Stata 119 dta files. (:issue:`28250`) +- Implemented :meth:`pandas.core.window.Window.var` and :meth:`pandas.core.window.Window.std` functions (:issue:`26597`) +- Added ``encoding`` argument to :meth:`DataFrame.to_string` for non-ascii text (:issue:`28766`) +- Added ``encoding`` argument to :func:`DataFrame.to_html` for non-ascii text (:issue:`28663`) +- :meth:`Styler.background_gradient` now accepts ``vmin`` and ``vmax`` arguments (:issue:`12145`) +- :meth:`Styler.format` added the ``na_rep`` parameter to help format the missing values (:issue:`21527`, :issue:`28358`) +- Roundtripping DataFrames with nullable integer, string and period data types to parquet + (:meth:`~DataFrame.to_parquet` / :func:`read_parquet`) using the `'pyarrow'` engine + now preserve those data types with pyarrow >= 1.0.0 (:issue:`20612`). +- :func:`read_excel` now can read binary Excel (``.xlsb``) files by passing ``engine='pyxlsb'``. For more details and example usage, see the :ref:`Binary Excel files documentation `. Closes :issue:`8540`. +- The ``partition_cols`` argument in :meth:`DataFrame.to_parquet` now accepts a string (:issue:`27117`) +- :func:`pandas.read_json` now parses ``NaN``, ``Infinity`` and ``-Infinity`` (:issue:`12213`) +- :func:`to_parquet` now appropriately handles the ``schema`` argument for user defined schemas in the pyarrow engine. (:issue:`30270`) +- DataFrame constructor preserve `ExtensionArray` dtype with `ExtensionArray` (:issue:`11363`) +- :meth:`DataFrame.sort_values` and :meth:`Series.sort_values` have gained ``ignore_index`` keyword to be able to reset index after sorting (:issue:`30114`) +- :meth:`DataFrame.sort_index` and :meth:`Series.sort_index` have gained ``ignore_index`` keyword to reset index (:issue:`30114`) +- :meth:`DataFrame.drop_duplicates` has gained ``ignore_index`` keyword to reset index (:issue:`30114`) +- Added new writer for exporting Stata dta files in versions 118 and 119, ``StataWriterUTF8``. These files formats support exporting strings containing Unicode characters. Format 119 supports data sets with more than 32,767 variables (:issue:`23573`, :issue:`30959`) +- :meth:`Series.map` now accepts ``collections.abc.Mapping`` subclasses as a mapper (:issue:`29733`) +- Added an experimental :attr:`~DataFrame.attrs` for storing global metadata about a dataset (:issue:`29062`) +- :meth:`Timestamp.fromisocalendar` is now compatible with python 3.8 and above (:issue:`28115`) +- :meth:`DataFrame.to_pickle` and :func:`read_pickle` now accept URL (:issue:`30163`) + + +Build Changes +^^^^^^^^^^^^^ + +Pandas has added a `pyproject.toml `_ file and will no longer include +cythonized files in the source distribution uploaded to PyPI (:issue:`28341`, :issue:`20775`). If you're installing +a built distribution (wheel) or via conda, this shouldn't have any effect on you. If you're building pandas from +source, you should no longer need to install Cython into your build environment before calling ``pip install pandas``. .. --------------------------------------------------------------------------- -.. _whatsnew_110.api.other: - -Other API changes -^^^^^^^^^^^^^^^^^ - -- :meth:`Series.describe` will now show distribution percentiles for ``datetime`` dtypes, statistics ``first`` and ``last`` - will now be ``min`` and ``max`` to match with numeric dtypes in :meth:`DataFrame.describe` (:issue:`30164`) -- +.. _whatsnew_100.api_breaking: Backwards incompatible API changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -- :meth:`DataFrame.swaplevels` now raises a ``TypeError`` if the axis is not a :class:`MultiIndex`. - Previously a ``AttributeError`` was raised (:issue:`31126`) +.. _whatsnew_100.api_breaking.MultiIndex._names: + +Avoid using names from ``MultiIndex.levels`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +As part of a larger refactor to :class:`MultiIndex` the level names are now +stored separately from the levels (:issue:`27242`). We recommend using +:attr:`MultiIndex.names` to access the names, and :meth:`Index.set_names` +to update the names. + +For backwards compatibility, you can still *access* the names via the levels. + +.. ipython:: python + + mi = pd.MultiIndex.from_product([[1, 2], ['a', 'b']], names=['x', 'y']) + mi.levels[0].name + +However, it is no longer possible to *update* the names of the ``MultiIndex`` +via the level. + +.. ipython:: python + :okexcept: + + mi.levels[0].name = "new name" + mi.names + +To update, use ``MultiIndex.set_names``, which returns a new ``MultiIndex``. + +.. ipython:: python + + mi2 = mi.set_names("new name", level=0) + mi2.names + +New repr for :class:`~pandas.arrays.IntervalArray` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- :class:`pandas.arrays.IntervalArray` adopts a new ``__repr__`` in accordance with other array classes (:issue:`25022`) + +*pandas 0.25.x* + +.. code-block:: ipython + + In [1]: pd.arrays.IntervalArray.from_tuples([(0, 1), (2, 3)]) + Out[2]: + IntervalArray([(0, 1], (2, 3]], + closed='right', + dtype='interval[int64]') + +*pandas 1.0.0* + +.. ipython:: python + + pd.arrays.IntervalArray.from_tuples([(0, 1), (2, 3)]) + +``DataFrame.rename`` now only accepts one positional argument +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- :meth:`DataFrame.rename` would previously accept positional arguments that would lead + to ambiguous or undefined behavior. From pandas 1.0, only the very first argument, which + maps labels to their new names along the default axis, is allowed to be passed by position + (:issue:`29136`). + +*pandas 0.25.x* + +.. code-block:: ipython + + In [1]: df = pd.DataFrame([[1]]) + In [2]: df.rename({0: 1}, {0: 2}) + FutureWarning: ...Use named arguments to resolve ambiguity... + Out[2]: + 2 + 1 1 + +*pandas 1.0.0* + +.. ipython:: python + :okexcept: + + df.rename({0: 1}, {0: 2}) + +Note that errors will now be raised when conflicting or potentially ambiguous arguments are provided. + +*pandas 0.25.x* + +.. code-block:: ipython + + In [1]: df.rename({0: 1}, index={0: 2}) + Out[1]: + 0 + 1 1 + + In [2]: df.rename(mapper={0: 1}, index={0: 2}) + Out[2]: + 0 + 2 1 + +*pandas 1.0.0* + +.. ipython:: python + :okexcept: + + df.rename({0: 1}, index={0: 2}) + df.rename(mapper={0: 1}, index={0: 2}) + +You can still change the axis along which the first positional argument is applied by +supplying the ``axis`` keyword argument. + +.. ipython:: python + + df.rename({0: 1}) + df.rename({0: 1}, axis=1) + +If you would like to update both the index and column labels, be sure to use the respective +keywords. + +.. ipython:: python + + df.rename(index={0: 1}, columns={0: 2}) + +Extended verbose info output for :class:`~pandas.DataFrame` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- :meth:`DataFrame.info` now shows line numbers for the columns summary (:issue:`17304`) + +*pandas 0.25.x* + +.. code-block:: python + + >>> df = pd.DataFrame({"int_col": [1, 2, 3], + ... "text_col": ["a", "b", "c"], + ... "float_col": [0.0, 0.1, 0.2]}) + >>> df.info(verbose=True) + + RangeIndex: 3 entries, 0 to 2 + Data columns (total 3 columns): + int_col 3 non-null int64 + text_col 3 non-null object + float_col 3 non-null float64 + dtypes: float64(1), int64(1), object(1) + memory usage: 152.0+ bytes + +*pandas 1.0.0* + +.. ipython:: python + + df = pd.DataFrame({"int_col": [1, 2, 3], + "text_col": ["a", "b", "c"], + "float_col": [0.0, 0.1, 0.2]}) + df.info(verbose=True) + +:meth:`pandas.array` inference changes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:meth:`pandas.array` now infers pandas' new extension types in several cases (:issue:`29791`): + +1. String data (including missing values) now returns a :class:`arrays.StringArray`. +2. Integer data (including missing values) now returns a :class:`arrays.IntegerArray`. +3. Boolean data (including missing values) now returns the new :class:`arrays.BooleanArray` + +*pandas 0.25.x* + +.. code-block:: python + + >>> pd.array(["a", None]) + + ['a', None] + Length: 2, dtype: object + + >>> pd.array([1, None]) + + [1, None] + Length: 2, dtype: object + + +*pandas 1.0.0* + +.. ipython:: python + + pd.array(["a", None]) + pd.array([1, None]) + +As a reminder, you can specify the ``dtype`` to disable all inference. + +:class:`arrays.IntegerArray` now uses :attr:`pandas.NA` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:class:`arrays.IntegerArray` now uses :attr:`pandas.NA` rather than +:attr:`numpy.nan` as its missing value marker (:issue:`29964`). + +*pandas 0.25.x* + +.. code-block:: python + + >>> a = pd.array([1, 2, None], dtype="Int64") + >>> a + + [1, 2, NaN] + Length: 3, dtype: Int64 + + >>> a[2] + nan + +*pandas 1.0.0* + +.. ipython:: python + + a = pd.array([1, 2, None], dtype="Int64") + a + a[2] + +This has a few API-breaking consequences. + +**Converting to a NumPy ndarray** + +When converting to a NumPy array missing values will be ``pd.NA``, which cannot +be converted to a float. So calling ``np.asarray(integer_array, dtype="float")`` +will now raise. + +*pandas 0.25.x* + +.. code-block:: python + + >>> np.asarray(a, dtype="float") + array([ 1., 2., nan]) + +*pandas 1.0.0* + +.. ipython:: python + :okexcept: + + np.asarray(a, dtype="float") + +Use :meth:`arrays.IntegerArray.to_numpy` with an explicit ``na_value`` instead. + +.. ipython:: python + + a.to_numpy(dtype="float", na_value=np.nan) + +**Reductions can return ``pd.NA``** + +When performing a reduction such as a sum with ``skipna=False``, the result +will now be ``pd.NA`` instead of ``np.nan`` in presence of missing values +(:issue:`30958`). + +*pandas 0.25.x* + +.. code-block:: python + + >>> pd.Series(a).sum(skipna=False) + nan + +*pandas 1.0.0* + +.. ipython:: python + + pd.Series(a).sum(skipna=False) + +**value_counts returns a nullable integer dtype** + +:meth:`Series.value_counts` with a nullable integer dtype now returns a nullable +integer dtype for the values. + +*pandas 0.25.x* + +.. code-block:: python + + >>> pd.Series([2, 1, 1, None], dtype="Int64").value_counts().dtype + dtype('int64') + +*pandas 1.0.0* + +.. ipython:: python + + pd.Series([2, 1, 1, None], dtype="Int64").value_counts().dtype + +See :ref:`missing_data.NA` for more on the differences between :attr:`pandas.NA` +and :attr:`numpy.nan`. + +:class:`arrays.IntegerArray` comparisons return :class:`arrays.BooleanArray` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Comparison operations on a :class:`arrays.IntegerArray` now returns a +:class:`arrays.BooleanArray` rather than a NumPy array (:issue:`29964`). + +*pandas 0.25.x* + +.. code-block:: python + + >>> a = pd.array([1, 2, None], dtype="Int64") + >>> a + + [1, 2, NaN] + Length: 3, dtype: Int64 + + >>> a > 1 + array([False, True, False]) + +*pandas 1.0.0* + +.. ipython:: python + + a = pd.array([1, 2, None], dtype="Int64") + a > 1 + +Note that missing values now propagate, rather than always comparing unequal +like :attr:`numpy.nan`. See :ref:`missing_data.NA` for more. + +By default :meth:`Categorical.min` now returns the minimum instead of np.nan +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When :class:`Categorical` contains ``np.nan``, +:meth:`Categorical.min` no longer return ``np.nan`` by default (skipna=True) (:issue:`25303`) + +*pandas 0.25.x* + +.. code-block:: ipython + + In [1]: pd.Categorical([1, 2, np.nan], ordered=True).min() + Out[1]: nan + + +*pandas 1.0.0* + +.. ipython:: python + + pd.Categorical([1, 2, np.nan], ordered=True).min() + + +Default dtype of empty :class:`pandas.Series` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Initialising an empty :class:`pandas.Series` without specifying a dtype will raise a `DeprecationWarning` now +(:issue:`17261`). The default dtype will change from ``float64`` to ``object`` in future releases so that it is +consistent with the behaviour of :class:`DataFrame` and :class:`Index`. + +*pandas 1.0.0* + +.. code-block:: ipython + + In [1]: pd.Series() + Out[2]: + DeprecationWarning: The default dtype for empty Series will be 'object' instead of 'float64' in a future version. Specify a dtype explicitly to silence this warning. + Series([], dtype: float64) + +.. _whatsnew_100.api_breaking.python: + +Increased minimum version for Python +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Pandas 1.0.0 supports Python 3.6.1 and higher (:issue:`29212`). + +.. _whatsnew_100.api_breaking.deps: + +Increased minimum versions for dependencies +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Some minimum supported versions of dependencies were updated (:issue:`29766`, :issue:`29723`). +If installed, we now require: + ++-----------------+-----------------+----------+---------+ +| Package | Minimum Version | Required | Changed | ++=================+=================+==========+=========+ +| numpy | 1.13.3 | X | | ++-----------------+-----------------+----------+---------+ +| pytz | 2015.4 | X | | ++-----------------+-----------------+----------+---------+ +| python-dateutil | 2.6.1 | X | | ++-----------------+-----------------+----------+---------+ +| bottleneck | 1.2.1 | | | ++-----------------+-----------------+----------+---------+ +| numexpr | 2.6.2 | | | ++-----------------+-----------------+----------+---------+ +| pytest (dev) | 4.0.2 | | | ++-----------------+-----------------+----------+---------+ + +For `optional libraries `_ the general recommendation is to use the latest version. +The following table lists the lowest version per library that is currently being tested throughout the development of pandas. +Optional libraries below the lowest tested version may still work, but are not considered supported. + ++-----------------+-----------------+---------+ +| Package | Minimum Version | Changed | ++=================+=================+=========+ +| beautifulsoup4 | 4.6.0 | | ++-----------------+-----------------+---------+ +| fastparquet | 0.3.2 | X | ++-----------------+-----------------+---------+ +| gcsfs | 0.2.2 | | ++-----------------+-----------------+---------+ +| lxml | 3.8.0 | | ++-----------------+-----------------+---------+ +| matplotlib | 2.2.2 | | ++-----------------+-----------------+---------+ +| numba | 0.46.0 | X | ++-----------------+-----------------+---------+ +| openpyxl | 2.5.7 | X | ++-----------------+-----------------+---------+ +| pyarrow | 0.13.0 | X | ++-----------------+-----------------+---------+ +| pymysql | 0.7.1 | | ++-----------------+-----------------+---------+ +| pytables | 3.4.2 | | ++-----------------+-----------------+---------+ +| s3fs | 0.3.0 | X | ++-----------------+-----------------+---------+ +| scipy | 0.19.0 | | ++-----------------+-----------------+---------+ +| sqlalchemy | 1.1.4 | | ++-----------------+-----------------+---------+ +| xarray | 0.8.2 | | ++-----------------+-----------------+---------+ +| xlrd | 1.1.0 | | ++-----------------+-----------------+---------+ +| xlsxwriter | 0.9.8 | | ++-----------------+-----------------+---------+ +| xlwt | 1.2.0 | | ++-----------------+-----------------+---------+ + +See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for more. + +.. _whatsnew_100.api.other: + +Other API changes +^^^^^^^^^^^^^^^^^ + +- Bumped the minimum supported version of ``s3fs`` from 0.0.8 to 0.3.0 (:issue:`28616`) +- :class:`core.groupby.GroupBy.transform` now raises on invalid operation names (:issue:`27489`) +- :meth:`pandas.api.types.infer_dtype` will now return "integer-na" for integer and ``np.nan`` mix (:issue:`27283`) +- :meth:`MultiIndex.from_arrays` will no longer infer names from arrays if ``names=None`` is explicitly provided (:issue:`27292`) +- In order to improve tab-completion, Pandas does not include most deprecated attributes when introspecting a pandas object using ``dir`` (e.g. ``dir(df)``). + To see which attributes are excluded, see an object's ``_deprecations`` attribute, for example ``pd.DataFrame._deprecations`` (:issue:`28805`). +- The returned dtype of ::func:`pd.unique` now matches the input dtype. (:issue:`27874`) +- Changed the default configuration value for ``options.matplotlib.register_converters`` from ``True`` to ``"auto"`` (:issue:`18720`). + Now, pandas custom formatters will only be applied to plots created by pandas, through :meth:`~DataFrame.plot`. + Previously, pandas' formatters would be applied to all plots created *after* a :meth:`~DataFrame.plot`. + See :ref:`units registration ` for more. +- :meth:`Series.dropna` has dropped its ``**kwargs`` argument in favor of a single ``how`` parameter. + Supplying anything else than ``how`` to ``**kwargs`` raised a ``TypeError`` previously (:issue:`29388`) +- When testing pandas, the new minimum required version of pytest is 5.0.1 (:issue:`29664`) +- :meth:`Series.str.__iter__` was deprecated and will be removed in future releases (:issue:`28277`). +- Added ```` to the list of default NA values for :meth:`read_csv` (:issue:`30821`) + +.. _whatsnew_100.api.documentation: + +Documentation Improvements +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- Added new section on :ref:`scale` (:issue:`28315`). +- Added sub-section on :ref:`io.query_multi` for HDF5 datasets (:issue:`28791`). .. --------------------------------------------------------------------------- -.. _whatsnew_110.deprecations: +.. _whatsnew_100.deprecations: Deprecations ~~~~~~~~~~~~ -- -- +- :meth:`Series.item` and :meth:`Index.item` have been _undeprecated_ (:issue:`29250`) +- ``Index.set_value`` has been deprecated. For a given index ``idx``, array ``arr``, + value in ``idx`` of ``idx_val`` and a new value of ``val``, ``idx.set_value(arr, idx_val, val)`` + is equivalent to ``arr[idx.get_loc(idx_val)] = val``, which should be used instead (:issue:`28621`). +- :func:`is_extension_type` is deprecated, :func:`is_extension_array_dtype` should be used instead (:issue:`29457`) +- :func:`eval` keyword argument "truediv" is deprecated and will be removed in a future version (:issue:`29812`) +- :meth:`DateOffset.isAnchored` and :meth:`DatetOffset.onOffset` are deprecated and will be removed in a future version, use :meth:`DateOffset.is_anchored` and :meth:`DateOffset.is_on_offset` instead (:issue:`30340`) +- ``pandas.tseries.frequencies.get_offset`` is deprecated and will be removed in a future version, use ``pandas.tseries.frequencies.to_offset`` instead (:issue:`4205`) +- :meth:`Categorical.take_nd` and :meth:`CategoricalIndex.take_nd` are deprecated, use :meth:`Categorical.take` and :meth:`CategoricalIndex.take` instead (:issue:`27745`) +- The parameter ``numeric_only`` of :meth:`Categorical.min` and :meth:`Categorical.max` is deprecated and replaced with ``skipna`` (:issue:`25303`) +- The parameter ``label`` in :func:`lreshape` has been deprecated and will be removed in a future version (:issue:`29742`) +- ``pandas.core.index`` has been deprecated and will be removed in a future version, the public classes are available in the top-level namespace (:issue:`19711`) +- :func:`pandas.json_normalize` is now exposed in the top-level namespace. + Usage of ``json_normalize`` as ``pandas.io.json.json_normalize`` is now deprecated and + it is recommended to use ``json_normalize`` as :func:`pandas.json_normalize` instead (:issue:`27586`). +- The ``numpy`` argument of :meth:`pandas.read_json` is deprecated (:issue:`28512`). +- :meth:`DataFrame.to_stata`, :meth:`DataFrame.to_feather`, and :meth:`DataFrame.to_parquet` argument "fname" is deprecated, use "path" instead (:issue:`23574`) +- The deprecated internal attributes ``_start``, ``_stop`` and ``_step`` of :class:`RangeIndex` now raise a ``FutureWarning`` instead of a ``DeprecationWarning`` (:issue:`26581`) +- The ``pandas.util.testing`` module has been deprecated. Use the public API in ``pandas.testing`` documented at :ref:`api.general.testing` (:issue:`16232`). +- ``pandas.SparseArray`` has been deprecated. Use ``pandas.arrays.SparseArray`` (:class:`arrays.SparseArray`) instead. (:issue:`30642`) +- The parameter ``is_copy`` of :meth:`Series.take` and :meth:`DataFrame.take` has been deprecated and will be removed in a future version. (:issue:`27357`) +- Support for multi-dimensional indexing (e.g. ``index[:, None]``) on a :class:`Index` is deprecated and will be removed in a future version, convert to a numpy array before indexing instead (:issue:`30588`) +- The ``pandas.np`` submodule is now deprecated. Import numpy directly instead (:issue:`30296`) +- The ``pandas.datetime`` class is now deprecated. Import from ``datetime`` instead (:issue:`30610`) +- :class:`~DataFrame.diff` will raise a ``TypeError`` rather than implicitly losing the dtype of extension types in the future. Convert to the correct dtype before calling ``diff`` instead (:issue:`31025`) + +**Selecting Columns from a Grouped DataFrame** + +When selecting columns from a :class:`DataFrameGroupBy` object, passing individual keys (or a tuple of keys) inside single brackets is deprecated, +a list of items should be used instead. (:issue:`23566`) For example: + +.. code-block:: ipython + + df = pd.DataFrame({ + "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], + "B": np.random.randn(8), + "C": np.random.randn(8), + }) + g = df.groupby('A') + + # single key, returns SeriesGroupBy + g['B'] + + # tuple of single key, returns SeriesGroupBy + g[('B',)] + + # tuple of multiple keys, returns DataFrameGroupBy, raises FutureWarning + g[('B', 'C')] + + # multiple keys passed directly, returns DataFrameGroupBy, raises FutureWarning + # (implicitly converts the passed strings into a single tuple) + g['B', 'C'] + + # proper way, returns DataFrameGroupBy + g[['B', 'C']] .. --------------------------------------------------------------------------- +.. _whatsnew_100.prior_deprecations: + +Removal of prior version deprecations/changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +**Removed SparseSeries and SparseDataFrame** + +``SparseSeries``, ``SparseDataFrame`` and the ``DataFrame.to_sparse`` method +have been removed (:issue:`28425`). We recommend using a ``Series`` or +``DataFrame`` with sparse values instead. See :ref:`sparse.migration` for help +with migrating existing code. + +.. _whatsnew_100.matplotlib_units: + +**Matplotlib unit registration** + +Previously, pandas would register converters with matplotlib as a side effect of importing pandas (:issue:`18720`). +This changed the output of plots made via matplotlib plots after pandas was imported, even if you were using +matplotlib directly rather than :meth:`~DataFrame.plot`. + +To use pandas formatters with a matplotlib plot, specify + +.. code-block:: python + + >>> import pandas as pd + >>> pd.options.plotting.matplotlib.register_converters = True + +Note that plots created by :meth:`DataFrame.plot` and :meth:`Series.plot` *do* register the converters +automatically. The only behavior change is when plotting a date-like object via ``matplotlib.pyplot.plot`` +or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more. + +**Other removals** + +- Removed the previously deprecated keyword "index" from :func:`read_stata`, :class:`StataReader`, and :meth:`StataReader.read`, use "index_col" instead (:issue:`17328`) +- Removed ``StataReader.data`` method, use :meth:`StataReader.read` instead (:issue:`9493`) +- Removed ``pandas.plotting._matplotlib.tsplot``, use :meth:`Series.plot` instead (:issue:`19980`) +- ``pandas.tseries.converter.register`` has been moved to :func:`pandas.plotting.register_matplotlib_converters` (:issue:`18307`) +- :meth:`Series.plot` no longer accepts positional arguments, pass keyword arguments instead (:issue:`30003`) +- :meth:`DataFrame.hist` and :meth:`Series.hist` no longer allows ``figsize="default"``, specify figure size by passinig a tuple instead (:issue:`30003`) +- Floordiv of integer-dtyped array by :class:`Timedelta` now raises ``TypeError`` (:issue:`21036`) +- :class:`TimedeltaIndex` and :class:`DatetimeIndex` no longer accept non-nanosecond dtype strings like "timedelta64" or "datetime64", use "timedelta64[ns]" and "datetime64[ns]" instead (:issue:`24806`) +- Changed the default "skipna" argument in :func:`pandas.api.types.infer_dtype` from ``False`` to ``True`` (:issue:`24050`) +- Removed ``Series.ix`` and ``DataFrame.ix`` (:issue:`26438`) +- Removed ``Index.summary`` (:issue:`18217`) +- Removed the previously deprecated keyword "fastpath" from the :class:`Index` constructor (:issue:`23110`) +- Removed ``Series.get_value``, ``Series.set_value``, ``DataFrame.get_value``, ``DataFrame.set_value`` (:issue:`17739`) +- Removed ``Series.compound`` and ``DataFrame.compound`` (:issue:`26405`) +- Changed the default "inplace" argument in :meth:`DataFrame.set_index` and :meth:`Series.set_axis` from ``None`` to ``False`` (:issue:`27600`) +- Removed ``Series.cat.categorical``, ``Series.cat.index``, ``Series.cat.name`` (:issue:`24751`) +- Removed the previously deprecated keyword "box" from :func:`to_datetime` and :func:`to_timedelta`; in addition these now always returns :class:`DatetimeIndex`, :class:`TimedeltaIndex`, :class:`Index`, :class:`Series`, or :class:`DataFrame` (:issue:`24486`) +- :func:`to_timedelta`, :class:`Timedelta`, and :class:`TimedeltaIndex` no longer allow "M", "y", or "Y" for the "unit" argument (:issue:`23264`) +- Removed the previously deprecated keyword "time_rule" from (non-public) ``offsets.generate_range``, which has been moved to :func:`core.arrays._ranges.generate_range` (:issue:`24157`) +- :meth:`DataFrame.loc` or :meth:`Series.loc` with listlike indexers and missing labels will no longer reindex (:issue:`17295`) +- :meth:`DataFrame.to_excel` and :meth:`Series.to_excel` with non-existent columns will no longer reindex (:issue:`17295`) +- Removed the previously deprecated keyword "join_axes" from :func:`concat`; use ``reindex_like`` on the result instead (:issue:`22318`) +- Removed the previously deprecated keyword "by" from :meth:`DataFrame.sort_index`, use :meth:`DataFrame.sort_values` instead (:issue:`10726`) +- Removed support for nested renaming in :meth:`DataFrame.aggregate`, :meth:`Series.aggregate`, :meth:`core.groupby.DataFrameGroupBy.aggregate`, :meth:`core.groupby.SeriesGroupBy.aggregate`, :meth:`core.window.rolling.Rolling.aggregate` (:issue:`18529`) +- Passing ``datetime64`` data to :class:`TimedeltaIndex` or ``timedelta64`` data to ``DatetimeIndex`` now raises ``TypeError`` (:issue:`23539`, :issue:`23937`) +- Passing ``int64`` values to :class:`DatetimeIndex` and a timezone now interprets the values as nanosecond timestamps in UTC, not wall times in the given timezone (:issue:`24559`) +- A tuple passed to :meth:`DataFrame.groupby` is now exclusively treated as a single key (:issue:`18314`) +- Removed ``Index.contains``, use ``key in index`` instead (:issue:`30103`) +- Addition and subtraction of ``int`` or integer-arrays is no longer allowed in :class:`Timestamp`, :class:`DatetimeIndex`, :class:`TimedeltaIndex`, use ``obj + n * obj.freq`` instead of ``obj + n`` (:issue:`22535`) +- Removed ``Series.ptp`` (:issue:`21614`) +- Removed ``Series.from_array`` (:issue:`18258`) +- Removed ``DataFrame.from_items`` (:issue:`18458`) +- Removed ``DataFrame.as_matrix``, ``Series.as_matrix`` (:issue:`18458`) +- Removed ``Series.asobject`` (:issue:`18477`) +- Removed ``DataFrame.as_blocks``, ``Series.as_blocks``, ``DataFrame.blocks``, ``Series.blocks`` (:issue:`17656`) +- :meth:`pandas.Series.str.cat` now defaults to aligning ``others``, using ``join='left'`` (:issue:`27611`) +- :meth:`pandas.Series.str.cat` does not accept list-likes *within* list-likes anymore (:issue:`27611`) +- :meth:`Series.where` with ``Categorical`` dtype (or :meth:`DataFrame.where` with ``Categorical`` column) no longer allows setting new categories (:issue:`24114`) +- Removed the previously deprecated keywords "start", "end", and "periods" from the :class:`DatetimeIndex`, :class:`TimedeltaIndex`, and :class:`PeriodIndex` constructors; use :func:`date_range`, :func:`timedelta_range`, and :func:`period_range` instead (:issue:`23919`) +- Removed the previously deprecated keyword "verify_integrity" from the :class:`DatetimeIndex` and :class:`TimedeltaIndex` constructors (:issue:`23919`) +- Removed the previously deprecated keyword "fastpath" from ``pandas.core.internals.blocks.make_block`` (:issue:`19265`) +- Removed the previously deprecated keyword "dtype" from :meth:`Block.make_block_same_class` (:issue:`19434`) +- Removed ``ExtensionArray._formatting_values``. Use :attr:`ExtensionArray._formatter` instead. (:issue:`23601`) +- Removed ``MultiIndex.to_hierarchical`` (:issue:`21613`) +- Removed ``MultiIndex.labels``, use :attr:`MultiIndex.codes` instead (:issue:`23752`) +- Removed the previously deprecated keyword "labels" from the :class:`MultiIndex` constructor, use "codes" instead (:issue:`23752`) +- Removed ``MultiIndex.set_labels``, use :meth:`MultiIndex.set_codes` instead (:issue:`23752`) +- Removed the previously deprecated keyword "labels" from :meth:`MultiIndex.set_codes`, :meth:`MultiIndex.copy`, :meth:`MultiIndex.drop`, use "codes" instead (:issue:`23752`) +- Removed support for legacy HDF5 formats (:issue:`29787`) +- Passing a dtype alias (e.g. 'datetime64[ns, UTC]') to :class:`DatetimeTZDtype` is no longer allowed, use :meth:`DatetimeTZDtype.construct_from_string` instead (:issue:`23990`) +- Removed the previously deprecated keyword "skip_footer" from :func:`read_excel`; use "skipfooter" instead (:issue:`18836`) +- :func:`read_excel` no longer allows an integer value for the parameter ``usecols``, instead pass a list of integers from 0 to ``usecols`` inclusive (:issue:`23635`) +- Removed the previously deprecated keyword "convert_datetime64" from :meth:`DataFrame.to_records` (:issue:`18902`) +- Removed ``IntervalIndex.from_intervals`` in favor of the :class:`IntervalIndex` constructor (:issue:`19263`) +- Changed the default "keep_tz" argument in :meth:`DatetimeIndex.to_series` from ``None`` to ``True`` (:issue:`23739`) +- Removed ``api.types.is_period`` and ``api.types.is_datetimetz`` (:issue:`23917`) +- Ability to read pickles containing :class:`Categorical` instances created with pre-0.16 version of pandas has been removed (:issue:`27538`) +- Removed ``pandas.tseries.plotting.tsplot`` (:issue:`18627`) +- Removed the previously deprecated keywords "reduce" and "broadcast" from :meth:`DataFrame.apply` (:issue:`18577`) +- Removed the previously deprecated ``assert_raises_regex`` function in ``pandas._testing`` (:issue:`29174`) +- Removed the previously deprecated ``FrozenNDArray`` class in ``pandas.core.indexes.frozen`` (:issue:`29335`) +- Removed the previously deprecated keyword "nthreads" from :func:`read_feather`, use "use_threads" instead (:issue:`23053`) +- Removed ``Index.is_lexsorted_for_tuple`` (:issue:`29305`) +- Removed support for nested renaming in :meth:`DataFrame.aggregate`, :meth:`Series.aggregate`, :meth:`core.groupby.DataFrameGroupBy.aggregate`, :meth:`core.groupby.SeriesGroupBy.aggregate`, :meth:`core.window.rolling.Rolling.aggregate` (:issue:`29608`) +- Removed ``Series.valid``; use :meth:`Series.dropna` instead (:issue:`18800`) +- Removed ``DataFrame.is_copy``, ``Series.is_copy`` (:issue:`18812`) +- Removed ``DataFrame.get_ftype_counts``, ``Series.get_ftype_counts`` (:issue:`18243`) +- Removed ``DataFrame.ftypes``, ``Series.ftypes``, ``Series.ftype`` (:issue:`26744`) +- Removed ``Index.get_duplicates``, use ``idx[idx.duplicated()].unique()`` instead (:issue:`20239`) +- Removed ``Series.clip_upper``, ``Series.clip_lower``, ``DataFrame.clip_upper``, ``DataFrame.clip_lower`` (:issue:`24203`) +- Removed the ability to alter :attr:`DatetimeIndex.freq`, :attr:`TimedeltaIndex.freq`, or :attr:`PeriodIndex.freq` (:issue:`20772`) +- Removed ``DatetimeIndex.offset`` (:issue:`20730`) +- Removed ``DatetimeIndex.asobject``, ``TimedeltaIndex.asobject``, ``PeriodIndex.asobject``, use ``astype(object)`` instead (:issue:`29801`) +- Removed the previously deprecated keyword "order" from :func:`factorize` (:issue:`19751`) +- Removed the previously deprecated keyword "encoding" from :func:`read_stata` and :meth:`DataFrame.to_stata` (:issue:`21400`) +- Changed the default "sort" argument in :func:`concat` from ``None`` to ``False`` (:issue:`20613`) +- Removed the previously deprecated keyword "raise_conflict" from :meth:`DataFrame.update`, use "errors" instead (:issue:`23585`) +- Removed the previously deprecated keyword "n" from :meth:`DatetimeIndex.shift`, :meth:`TimedeltaIndex.shift`, :meth:`PeriodIndex.shift`, use "periods" instead (:issue:`22458`) +- Removed the previously deprecated keywords "how", "fill_method", and "limit" from :meth:`DataFrame.resample` (:issue:`30139`) +- Passing an integer to :meth:`Series.fillna` or :meth:`DataFrame.fillna` with ``timedelta64[ns]`` dtype now raises ``TypeError`` (:issue:`24694`) +- Passing multiple axes to :meth:`DataFrame.dropna` is no longer supported (:issue:`20995`) +- Removed ``Series.nonzero``, use ``to_numpy().nonzero()`` instead (:issue:`24048`) +- Passing floating dtype ``codes`` to :meth:`Categorical.from_codes` is no longer supported, pass ``codes.astype(np.int64)`` instead (:issue:`21775`) +- Removed the previously deprecated keyword "pat" from :meth:`Series.str.partition` and :meth:`Series.str.rpartition`, use "sep" instead (:issue:`23767`) +- Removed ``Series.put`` (:issue:`27106`) +- Removed ``Series.real``, ``Series.imag`` (:issue:`27106`) +- Removed ``Series.to_dense``, ``DataFrame.to_dense`` (:issue:`26684`) +- Removed ``Index.dtype_str``, use ``str(index.dtype)`` instead (:issue:`27106`) +- :meth:`Categorical.ravel` returns a :class:`Categorical` instead of a ``ndarray`` (:issue:`27199`) +- The 'outer' method on Numpy ufuncs, e.g. ``np.subtract.outer`` operating on :class:`Series` objects is no longer supported, and will raise ``NotImplementedError`` (:issue:`27198`) +- Removed ``Series.get_dtype_counts`` and ``DataFrame.get_dtype_counts`` (:issue:`27145`) +- Changed the default "fill_value" argument in :meth:`Categorical.take` from ``True`` to ``False`` (:issue:`20841`) +- Changed the default value for the `raw` argument in :func:`Series.rolling().apply() `, :func:`DataFrame.rolling().apply() `, :func:`Series.expanding().apply() `, and :func:`DataFrame.expanding().apply() ` from ``None`` to ``False`` (:issue:`20584`) +- Removed deprecated behavior of :meth:`Series.argmin` and :meth:`Series.argmax`, use :meth:`Series.idxmin` and :meth:`Series.idxmax` for the old behavior (:issue:`16955`) +- Passing a tz-aware ``datetime.datetime`` or :class:`Timestamp` into the :class:`Timestamp` constructor with the ``tz`` argument now raises a ``ValueError`` (:issue:`23621`) +- Removed ``Series.base``, ``Index.base``, ``Categorical.base``, ``Series.flags``, ``Index.flags``, ``PeriodArray.flags``, ``Series.strides``, ``Index.strides``, ``Series.itemsize``, ``Index.itemsize``, ``Series.data``, ``Index.data`` (:issue:`20721`) +- Changed :meth:`Timedelta.resolution` to match the behavior of the standard library ``datetime.timedelta.resolution``, for the old behavior, use :meth:`Timedelta.resolution_string` (:issue:`26839`) +- Removed ``Timestamp.weekday_name``, ``DatetimeIndex.weekday_name``, and ``Series.dt.weekday_name`` (:issue:`18164`) +- Removed the previously deprecated keyword "errors" in :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` (:issue:`22644`) +- Changed the default "ordered" argument in :class:`CategoricalDtype` from ``None`` to ``False`` (:issue:`26336`) +- :meth:`Series.set_axis` and :meth:`DataFrame.set_axis` now require "labels" as the first argument and "axis" as an optional named parameter (:issue:`30089`) +- Removed ``to_msgpack``, ``read_msgpack``, ``DataFrame.to_msgpack``, ``Series.to_msgpack`` (:issue:`27103`) +- Removed ``Series.compress`` (:issue:`21930`) +- Removed the previously deprecated keyword "fill_value" from :meth:`Categorical.fillna`, use "value" instead (:issue:`19269`) +- Removed the previously deprecated keyword "data" from :func:`andrews_curves`, use "frame" instead (:issue:`6956`) +- Removed the previously deprecated keyword "data" from :func:`parallel_coordinates`, use "frame" instead (:issue:`6956`) +- Removed the previously deprecated keyword "colors" from :func:`parallel_coordinates`, use "color" instead (:issue:`6956`) +- Removed the previously deprecated keywords "verbose" and "private_key" from :func:`read_gbq` (:issue:`30200`) +- Calling ``np.array`` and ``np.asarray`` on tz-aware :class:`Series` and :class:`DatetimeIndex` will now return an object array of tz-aware :class:`Timestamp` (:issue:`24596`) +- + +.. --------------------------------------------------------------------------- -.. _whatsnew_110.performance: +.. _whatsnew_100.performance: Performance improvements ~~~~~~~~~~~~~~~~~~~~~~~~ -- Performance improvement in :class:`Timedelta` constructor (:issue:`30543`) -- Performance improvement in :class:`Timestamp` constructor (:issue:`30543`) -- -- +- Performance improvement in :class:`DataFrame` arithmetic and comparison operations with scalars (:issue:`24990`, :issue:`29853`) +- Performance improvement in indexing with a non-unique :class:`IntervalIndex` (:issue:`27489`) +- Performance improvement in :attr:`MultiIndex.is_monotonic` (:issue:`27495`) +- Performance improvement in :func:`cut` when ``bins`` is an :class:`IntervalIndex` (:issue:`27668`) +- Performance improvement when initializing a :class:`DataFrame` using a ``range`` (:issue:`30171`) +- Performance improvement in :meth:`DataFrame.corr` when ``method`` is ``"spearman"`` (:issue:`28139`) +- Performance improvement in :meth:`DataFrame.replace` when provided a list of values to replace (:issue:`28099`) +- Performance improvement in :meth:`DataFrame.select_dtypes` by using vectorization instead of iterating over a loop (:issue:`28317`) +- Performance improvement in :meth:`Categorical.searchsorted` and :meth:`CategoricalIndex.searchsorted` (:issue:`28795`) +- Performance improvement when comparing a :class:`Categorical` with a scalar and the scalar is not found in the categories (:issue:`29750`) +- Performance improvement when checking if values in a :class:`Categorical` are equal, equal or larger or larger than a given scalar. + The improvement is not present if checking if the :class:`Categorical` is less than or less than or equal than the scalar (:issue:`29820`) +- Performance improvement in :meth:`Index.equals` and :meth:`MultiIndex.equals` (:issue:`29134`) +- Performance improvement in :func:`~pandas.api.types.infer_dtype` when ``skipna`` is ``True`` (:issue:`28814`) .. --------------------------------------------------------------------------- -.. _whatsnew_110.bug_fixes: +.. _whatsnew_100.bug_fixes: Bug fixes ~~~~~~~~~ @@ -96,19 +967,61 @@ Bug fixes Categorical ^^^^^^^^^^^ -- -- +- Added test to assert the :func:`fillna` raises the correct ``ValueError`` message when the value isn't a value from categories (:issue:`13628`) +- Bug in :meth:`Categorical.astype` where ``NaN`` values were handled incorrectly when casting to int (:issue:`28406`) +- :meth:`DataFrame.reindex` with a :class:`CategoricalIndex` would fail when the targets contained duplicates, and wouldn't fail if the source contained duplicates (:issue:`28107`) +- Bug in :meth:`Categorical.astype` not allowing for casting to extension dtypes (:issue:`28668`) +- Bug where :func:`merge` was unable to join on categorical and extension dtype columns (:issue:`28668`) +- :meth:`Categorical.searchsorted` and :meth:`CategoricalIndex.searchsorted` now work on unordered categoricals also (:issue:`21667`) +- Added test to assert roundtripping to parquet with :func:`DataFrame.to_parquet` or :func:`read_parquet` will preserve Categorical dtypes for string types (:issue:`27955`) +- Changed the error message in :meth:`Categorical.remove_categories` to always show the invalid removals as a set (:issue:`28669`) +- Using date accessors on a categorical dtyped :class:`Series` of datetimes was not returning an object of the + same type as if one used the :meth:`.str.` / :meth:`.dt.` on a :class:`Series` of that type. E.g. when accessing :meth:`Series.dt.tz_localize` on a + :class:`Categorical` with duplicate entries, the accessor was skipping duplicates (:issue:`27952`) +- Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` that would give incorrect results on categorical data (:issue:`26988`) +- Bug where calling :meth:`Categorical.min` or :meth:`Categorical.max` on an empty Categorical would raise a numpy exception (:issue:`30227`) +- The following methods now also correctly output values for unobserved categories when called through ``groupby(..., observed=False)`` (:issue:`17605`) + * :meth:`core.groupby.SeriesGroupBy.count` + * :meth:`core.groupby.SeriesGroupBy.size` + * :meth:`core.groupby.SeriesGroupBy.nunique` + * :meth:`core.groupby.SeriesGroupBy.nth` + Datetimelike ^^^^^^^^^^^^ - -- Bug in :class:`Timestamp` where constructing :class:`Timestamp` from ambiguous epoch time and calling constructor again changed :meth:`Timestamp.value` property (:issue:`24329`) -- :meth:`DatetimeArray.searchsorted`, :meth:`TimedeltaArray.searchsorted`, :meth:`PeriodArray.searchsorted` not recognizing non-pandas scalars and incorrectly raising ``ValueError`` instead of ``TypeError`` (:issue:`30950`) -- Bug in :class:`Timestamp` where constructing :class:`Timestamp` with dateutil timezone less than 128 nanoseconds before daylight saving time switch from winter to summer would result in nonexistent time (:issue:`31043`) +- Bug in :meth:`Series.__setitem__` incorrectly casting ``np.timedelta64("NaT")`` to ``np.datetime64("NaT")`` when inserting into a :class:`Series` with datetime64 dtype (:issue:`27311`) +- Bug in :meth:`Series.dt` property lookups when the underlying data is read-only (:issue:`27529`) +- Bug in ``HDFStore.__getitem__`` incorrectly reading tz attribute created in Python 2 (:issue:`26443`) +- Bug in :func:`to_datetime` where passing arrays of malformed ``str`` with errors="coerce" could incorrectly lead to raising ``ValueError`` (:issue:`28299`) +- Bug in :meth:`core.groupby.SeriesGroupBy.nunique` where ``NaT`` values were interfering with the count of unique values (:issue:`27951`) +- Bug in :class:`Timestamp` subtraction when subtracting a :class:`Timestamp` from a ``np.datetime64`` object incorrectly raising ``TypeError`` (:issue:`28286`) +- Addition and subtraction of integer or integer-dtype arrays with :class:`Timestamp` will now raise ``NullFrequencyError`` instead of ``ValueError`` (:issue:`28268`) +- Bug in :class:`Series` and :class:`DataFrame` with integer dtype failing to raise ``TypeError`` when adding or subtracting a ``np.datetime64`` object (:issue:`28080`) +- Bug in :meth:`Series.astype`, :meth:`Index.astype`, and :meth:`DataFrame.astype` failing to handle ``NaT`` when casting to an integer dtype (:issue:`28492`) +- Bug in :class:`Week` with ``weekday`` incorrectly raising ``AttributeError`` instead of ``TypeError`` when adding or subtracting an invalid type (:issue:`28530`) +- Bug in :class:`DataFrame` arithmetic operations when operating with a :class:`Series` with dtype `'timedelta64[ns]'` (:issue:`28049`) +- Bug in :func:`core.groupby.generic.SeriesGroupBy.apply` raising ``ValueError`` when a column in the original DataFrame is a datetime and the column labels are not standard integers (:issue:`28247`) +- Bug in :func:`pandas._config.localization.get_locales` where the ``locales -a`` encodes the locales list as windows-1252 (:issue:`23638`, :issue:`24760`, :issue:`27368`) +- Bug in :meth:`Series.var` failing to raise ``TypeError`` when called with ``timedelta64[ns]`` dtype (:issue:`28289`) +- Bug in :meth:`DatetimeIndex.strftime` and :meth:`Series.dt.strftime` where ``NaT`` was converted to the string ``'NaT'`` instead of ``np.nan`` (:issue:`29578`) +- Bug in masking datetime-like arrays with a boolean mask of an incorrect length not raising an ``IndexError`` (:issue:`30308`) +- Bug in :attr:`Timestamp.resolution` being a property instead of a class attribute (:issue:`29910`) +- Bug in :func:`pandas.to_datetime` when called with ``None`` raising ``TypeError`` instead of returning ``NaT`` (:issue:`30011`) +- Bug in :func:`pandas.to_datetime` failing for `deques` when using ``cache=True`` (the default) (:issue:`29403`) +- Bug in :meth:`Series.item` with ``datetime64`` or ``timedelta64`` dtype, :meth:`DatetimeIndex.item`, and :meth:`TimedeltaIndex.item` returning an integer instead of a :class:`Timestamp` or :class:`Timedelta` (:issue:`30175`) +- Bug in :class:`DatetimeIndex` addition when adding a non-optimized :class:`DateOffset` incorrectly dropping timezone information (:issue:`30336`) +- Bug in :meth:`DataFrame.drop` where attempting to drop non-existent values from a DatetimeIndex would yield a confusing error message (:issue:`30399`) +- Bug in :meth:`DataFrame.append` would remove the timezone-awareness of new data (:issue:`30238`) +- Bug in :meth:`Series.cummin` and :meth:`Series.cummax` with timezone-aware dtype incorrectly dropping its timezone (:issue:`15553`) +- Bug in :class:`DatetimeArray`, :class:`TimedeltaArray`, and :class:`PeriodArray` where inplace addition and subtraction did not actually operate inplace (:issue:`24115`) +- Bug in :func:`pandas.to_datetime` when called with ``Series`` storing ``IntegerArray`` raising ``TypeError`` instead of returning ``Series`` (:issue:`30050`) +- Bug in :func:`date_range` with custom business hours as ``freq`` and given number of ``periods`` (:issue:`30593`) +- Bug in :class:`PeriodIndex` comparisons with incorrectly casting integers to :class:`Period` objects, inconsistent with the :class:`Period` comparison behavior (:issue:`30722`) +- Bug in :meth:`DatetimeIndex.insert` raising a ``ValueError`` instead of a ``TypeError`` when trying to insert a timezone-aware :class:`Timestamp` into a timezone-naive :class:`DatetimeIndex`, or vice-versa (:issue:`30806`) Timedelta ^^^^^^^^^ - +- Bug in subtracting a :class:`TimedeltaIndex` or :class:`TimedeltaArray` from a ``np.datetime64`` object (:issue:`29558`) - - @@ -121,34 +1034,62 @@ Timezones Numeric ^^^^^^^ -- Bug in :meth:`DataFrame.floordiv` with ``axis=0`` not treating division-by-zero like :meth:`Series.floordiv` (:issue:`31271`) -- -- +- Bug in :meth:`DataFrame.quantile` with zero-column :class:`DataFrame` incorrectly raising (:issue:`23925`) +- :class:`DataFrame` flex inequality comparisons methods (:meth:`DataFrame.lt`, :meth:`DataFrame.le`, :meth:`DataFrame.gt`, :meth:`DataFrame.ge`) with object-dtype and ``complex`` entries failing to raise ``TypeError`` like their :class:`Series` counterparts (:issue:`28079`) +- Bug in :class:`DataFrame` logical operations (`&`, `|`, `^`) not matching :class:`Series` behavior by filling NA values (:issue:`28741`) +- Bug in :meth:`DataFrame.interpolate` where specifying axis by name references variable before it is assigned (:issue:`29142`) +- Bug in :meth:`Series.var` not computing the right value with a nullable integer dtype series not passing through ddof argument (:issue:`29128`) +- Improved error message when using `frac` > 1 and `replace` = False (:issue:`27451`) +- Bug in numeric indexes resulted in it being possible to instantiate an :class:`Int64Index`, :class:`UInt64Index`, or :class:`Float64Index` with an invalid dtype (e.g. datetime-like) (:issue:`29539`) +- Bug in :class:`UInt64Index` precision loss while constructing from a list with values in the ``np.uint64`` range (:issue:`29526`) +- Bug in :class:`NumericIndex` construction that caused indexing to fail when integers in the ``np.uint64`` range were used (:issue:`28023`) +- Bug in :class:`NumericIndex` construction that caused :class:`UInt64Index` to be casted to :class:`Float64Index` when integers in the ``np.uint64`` range were used to index a :class:`DataFrame` (:issue:`28279`) +- Bug in :meth:`Series.interpolate` when using method=`index` with an unsorted index, would previously return incorrect results. (:issue:`21037`) +- Bug in :meth:`DataFrame.round` where a :class:`DataFrame` with a :class:`CategoricalIndex` of :class:`IntervalIndex` columns would incorrectly raise a ``TypeError`` (:issue:`30063`) +- Bug in :meth:`Series.pct_change` and :meth:`DataFrame.pct_change` when there are duplicated indices (:issue:`30463`) +- Bug in :class:`DataFrame` cumulative operations (e.g. cumsum, cummax) incorrect casting to object-dtype (:issue:`19296`) +- Bug in dtypes being lost in ``DataFrame.__invert__`` (``~`` operator) with mixed dtypes (:issue:`31183`) +- Bug in :class:`~DataFrame.diff` losing the dtype for extension types (:issue:`30889`) +- Bug in :class:`DataFrame.diff` raising an ``IndexError`` when one of the columns was a nullable integer dtype (:issue:`30967`) Conversion ^^^^^^^^^^ -- Bug in :class:`Series` construction from NumPy array with big-endian ``datetime64`` dtype (:issue:`29684`) + - - Strings ^^^^^^^ -- +- Calling :meth:`Series.str.isalnum` (and other "ismethods") on an empty ``Series`` would return an ``object`` dtype instead of ``bool`` (:issue:`29624`) - Interval ^^^^^^^^ -- -- +- Bug in :meth:`IntervalIndex.get_indexer` where a :class:`Categorical` or :class:`CategoricalIndex` ``target`` would incorrectly raise a ``TypeError`` (:issue:`30063`) +- Bug in ``pandas.core.dtypes.cast.infer_dtype_from_scalar`` where passing ``pandas_dtype=True`` did not infer :class:`IntervalDtype` (:issue:`30337`) +- Bug in :class:`Series` constructor where constructing a ``Series`` from a ``list`` of :class:`Interval` objects resulted in ``object`` dtype instead of :class:`IntervalDtype` (:issue:`23563`) +- Bug in :class:`IntervalDtype` where the ``kind`` attribute was incorrectly set as ``None`` instead of ``"O"`` (:issue:`30568`) +- Bug in :class:`IntervalIndex`, :class:`~arrays.IntervalArray`, and :class:`Series` with interval data where equality comparisons were incorrect (:issue:`24112`) Indexing ^^^^^^^^ -- Bug in slicing on a :class:`DatetimeIndex` with a partial-timestamp dropping high-resolution indices near the end of a year, quarter, or month (:issue:`31064`) -- Bug in :meth:`PeriodIndex.get_loc` treating higher-resolution strings differently from :meth:`PeriodIndex.get_value` (:issue:`31172`) -- + +- Bug in assignment using a reverse slicer (:issue:`26939`) +- Bug in :meth:`DataFrame.explode` would duplicate frame in the presence of duplicates in the index (:issue:`28010`) +- Bug in reindexing a :meth:`PeriodIndex` with another type of index that contained a `Period` (:issue:`28323`) (:issue:`28337`) +- Fix assignment of column via `.loc` with numpy non-ns datetime type (:issue:`27395`) +- Bug in :meth:`Float64Index.astype` where ``np.inf`` was not handled properly when casting to an integer dtype (:issue:`28475`) +- :meth:`Index.union` could fail when the left contained duplicates (:issue:`28257`) +- Bug when indexing with ``.loc`` where the index was a :class:`CategoricalIndex` with non-string categories didn't work (:issue:`17569`, :issue:`30225`) +- :meth:`Index.get_indexer_non_unique` could fail with ``TypeError`` in some cases, such as when searching for ints in a string index (:issue:`28257`) +- Bug in :meth:`Float64Index.get_loc` incorrectly raising ``TypeError`` instead of ``KeyError`` (:issue:`29189`) +- Bug in :meth:`DataFrame.loc` with incorrect dtype when setting Categorical value in 1-row DataFrame (:issue:`25495`) +- :meth:`MultiIndex.get_loc` can't find missing values when input includes missing values (:issue:`19132`) +- Bug in :meth:`Series.__setitem__` incorrectly assigning values with boolean indexer when the length of new data matches the number of ``True`` values and new data is not a ``Series`` or an ``np.array`` (:issue:`30567`) +- Bug in indexing with a :class:`PeriodIndex` incorrectly accepting integers representing years, use e.g. ``ser.loc["2007"]`` instead of ``ser.loc[2007]`` (:issue:`30763`) Missing ^^^^^^^ @@ -159,60 +1100,138 @@ Missing MultiIndex ^^^^^^^^^^ -- +- Constructor for :class:`MultiIndex` verifies that the given ``sortorder`` is compatible with the actual ``lexsort_depth`` if ``verify_integrity`` parameter is ``True`` (the default) (:issue:`28735`) +- Series and MultiIndex `.drop` with `MultiIndex` raise exception if labels not in given in level (:issue:`8594`) - I/O ^^^ -- Bug in :meth:`read_json` where integer overflow was occuring when json contains big number strings. (:issue:`30320`) -- -- + +- :meth:`read_csv` now accepts binary mode file buffers when using the Python csv engine (:issue:`23779`) +- Bug in :meth:`DataFrame.to_json` where using a Tuple as a column or index value and using ``orient="columns"`` or ``orient="index"`` would produce invalid JSON (:issue:`20500`) +- Improve infinity parsing. :meth:`read_csv` now interprets ``Infinity``, ``+Infinity``, ``-Infinity`` as floating point values (:issue:`10065`) +- Bug in :meth:`DataFrame.to_csv` where values were truncated when the length of ``na_rep`` was shorter than the text input data. (:issue:`25099`) +- Bug in :func:`DataFrame.to_string` where values were truncated using display options instead of outputting the full content (:issue:`9784`) +- Bug in :meth:`DataFrame.to_json` where a datetime column label would not be written out in ISO format with ``orient="table"`` (:issue:`28130`) +- Bug in :func:`DataFrame.to_parquet` where writing to GCS would fail with `engine='fastparquet'` if the file did not already exist (:issue:`28326`) +- Bug in :func:`read_hdf` closing stores that it didn't open when Exceptions are raised (:issue:`28699`) +- Bug in :meth:`DataFrame.read_json` where using ``orient="index"`` would not maintain the order (:issue:`28557`) +- Bug in :meth:`DataFrame.to_html` where the length of the ``formatters`` argument was not verified (:issue:`28469`) +- Bug in :meth:`DataFrame.read_excel` with ``engine='ods'`` when ``sheet_name`` argument references a non-existent sheet (:issue:`27676`) +- Bug in :meth:`pandas.io.formats.style.Styler` formatting for floating values not displaying decimals correctly (:issue:`13257`) +- Bug in :meth:`DataFrame.to_html` when using ``formatters=`` and ``max_cols`` together. (:issue:`25955`) +- Bug in :meth:`Styler.background_gradient` not able to work with dtype ``Int64`` (:issue:`28869`) +- Bug in :meth:`DataFrame.to_clipboard` which did not work reliably in ipython (:issue:`22707`) +- Bug in :func:`read_json` where default encoding was not set to ``utf-8`` (:issue:`29565`) +- Bug in :class:`PythonParser` where str and bytes were being mixed when dealing with the decimal field (:issue:`29650`) +- :meth:`read_gbq` now accepts ``progress_bar_type`` to display progress bar while the data downloads. (:issue:`29857`) +- Bug in :func:`pandas.io.json.json_normalize` where a missing value in the location specified by `record_path` would raise a ``TypeError`` (:issue:`30148`) +- :func:`read_excel` now accepts binary data (:issue:`15914`) +- Bug in :meth:`read_csv` in which encoding handling was limited to just the string `utf-16` for the C engine (:issue:`24130`) Plotting ^^^^^^^^ -- :func:`.plot` for line/bar now accepts color by dictonary (:issue:`8193`). -- +- Bug in :meth:`Series.plot` not able to plot boolean values (:issue:`23719`) +- Bug in :meth:`DataFrame.plot` not able to plot when no rows (:issue:`27758`) +- Bug in :meth:`DataFrame.plot` producing incorrect legend markers when plotting multiple series on the same axis (:issue:`18222`) +- Bug in :meth:`DataFrame.plot` when ``kind='box'`` and data contains datetime or timedelta data. These types are now automatically dropped (:issue:`22799`) +- Bug in :meth:`DataFrame.plot.line` and :meth:`DataFrame.plot.area` produce wrong xlim in x-axis (:issue:`27686`, :issue:`25160`, :issue:`24784`) +- Bug where :meth:`DataFrame.boxplot` would not accept a ``color`` parameter like :meth:`DataFrame.plot.box` (:issue:`26214`) +- Bug in the ``xticks`` argument being ignored for :meth:`DataFrame.plot.bar` (:issue:`14119`) +- :func:`set_option` now validates that the plot backend provided to ``'plotting.backend'`` implements the backend when the option is set, rather than when a plot is created (:issue:`28163`) +- :meth:`DataFrame.plot` now allow a ``backend`` keyword argument to allow changing between backends in one session (:issue:`28619`). +- Bug in color validation incorrectly raising for non-color styles (:issue:`29122`). +- Allow :meth:`DataFrame.plot.scatter` to plot ``objects`` and ``datetime`` type data (:issue:`18755`, :issue:`30391`) +- Bug in :meth:`DataFrame.hist`, ``xrot=0`` does not work with ``by`` and subplots (:issue:`30288`). Groupby/resample/rolling ^^^^^^^^^^^^^^^^^^^^^^^^ -- Bug in :meth:`GroupBy.apply` raises ``ValueError`` when the ``by`` axis is not sorted and has duplicates and the applied ``func`` does not mutate passed in objects (:issue:`30667`) -- Bug in :meth:`DataFrameGroupby.transform` produces incorrect result with transformation functions (:issue:`30918`) +- Bug in :meth:`core.groupby.DataFrameGroupBy.apply` only showing output from a single group when function returns an :class:`Index` (:issue:`28652`) +- Bug in :meth:`DataFrame.groupby` with multiple groups where an ``IndexError`` would be raised if any group contained all NA values (:issue:`20519`) +- Bug in :meth:`pandas.core.resample.Resampler.size` and :meth:`pandas.core.resample.Resampler.count` returning wrong dtype when used with an empty :class:`Series` or :class:`DataFrame` (:issue:`28427`) +- Bug in :meth:`DataFrame.rolling` not allowing for rolling over datetimes when ``axis=1`` (:issue:`28192`) +- Bug in :meth:`DataFrame.rolling` not allowing rolling over multi-index levels (:issue:`15584`). +- Bug in :meth:`DataFrame.rolling` not allowing rolling on monotonic decreasing time indexes (:issue:`19248`). +- Bug in :meth:`DataFrame.groupby` not offering selection by column name when ``axis=1`` (:issue:`27614`) +- Bug in :meth:`core.groupby.DataFrameGroupby.agg` not able to use lambda function with named aggregation (:issue:`27519`) +- Bug in :meth:`DataFrame.groupby` losing column name information when grouping by a categorical column (:issue:`28787`) +- Remove error raised due to duplicated input functions in named aggregation in :meth:`DataFrame.groupby` and :meth:`Series.groupby`. Previously error will be raised if the same function is applied on the same column and now it is allowed if new assigned names are different. (:issue:`28426`) +- :meth:`core.groupby.SeriesGroupBy.value_counts` will be able to handle the case even when the :class:`Grouper` makes empty groups (:issue:`28479`) +- Bug in :meth:`core.window.rolling.Rolling.quantile` ignoring ``interpolation`` keyword argument when used within a groupby (:issue:`28779`) +- Bug in :meth:`DataFrame.groupby` where ``any``, ``all``, ``nunique`` and transform functions would incorrectly handle duplicate column labels (:issue:`21668`) +- Bug in :meth:`core.groupby.DataFrameGroupBy.agg` with timezone-aware datetime64 column incorrectly casting results to the original dtype (:issue:`29641`) +- Bug in :meth:`DataFrame.groupby` when using axis=1 and having a single level columns index (:issue:`30208`) +- Bug in :meth:`DataFrame.groupby` when using nunique on axis=1 (:issue:`30253`) +- Bug in :meth:`GroupBy.quantile` with multiple list-like q value and integer column names (:issue:`30289`) +- Bug in :meth:`GroupBy.pct_change` and :meth:`core.groupby.SeriesGroupBy.pct_change` causes ``TypeError`` when ``fill_method`` is ``None`` (:issue:`30463`) +- Bug in :meth:`Rolling.count` and :meth:`Expanding.count` argument where ``min_periods`` was ignored (:issue:`26996`) Reshaping ^^^^^^^^^ -- Bug effecting all numeric and boolean reduction methods not returning subclassed data type. (:issue:`25596`) -- Bug in :meth:`DataFrame.pivot_table` when only MultiIndexed columns is set (:issue:`17038`) -- Bug in :meth:`DataFrame.unstack` and :meth:`Series.unstack` can take tuple names in MultiIndexed data (:issue:`19966`) -- Bug in :meth:`DataFrame.pivot_table` when ``margin`` is ``True`` and only ``column`` is defined (:issue:`31016`) -- Fix incorrect error message in :meth:`DataFrame.pivot` when ``columns`` is set to ``None``. (:issue:`30924`) -- Bug in :func:`crosstab` when inputs are two Series and have tuple names, the output will keep dummy MultiIndex as columns. (:issue:`18321`) -- Bug in :func:`concat` where the resulting indices are not copied when ``copy=True`` (:issue:`29879`) +- Bug in :meth:`DataFrame.apply` that caused incorrect output with empty :class:`DataFrame` (:issue:`28202`, :issue:`21959`) +- Bug in :meth:`DataFrame.stack` not handling non-unique indexes correctly when creating MultiIndex (:issue:`28301`) +- Bug in :meth:`pivot_table` not returning correct type ``float`` when ``margins=True`` and ``aggfunc='mean'`` (:issue:`24893`) +- Bug :func:`merge_asof` could not use :class:`datetime.timedelta` for ``tolerance`` kwarg (:issue:`28098`) +- Bug in :func:`merge`, did not append suffixes correctly with MultiIndex (:issue:`28518`) +- :func:`qcut` and :func:`cut` now handle boolean input (:issue:`20303`) +- Fix to ensure all int dtypes can be used in :func:`merge_asof` when using a tolerance value. Previously every non-int64 type would raise an erroneous ``MergeError`` (:issue:`28870`). +- Better error message in :func:`get_dummies` when `columns` isn't a list-like value (:issue:`28383`) +- Bug in :meth:`Index.join` that caused infinite recursion error for mismatched ``MultiIndex`` name orders. (:issue:`25760`, :issue:`28956`) +- Bug :meth:`Series.pct_change` where supplying an anchored frequency would throw a ``ValueError`` (:issue:`28664`) +- Bug where :meth:`DataFrame.equals` returned True incorrectly in some cases when two DataFrames had the same columns in different orders (:issue:`28839`) +- Bug in :meth:`DataFrame.replace` that caused non-numeric replacer's dtype not respected (:issue:`26632`) +- Bug in :func:`melt` where supplying mixed strings and numeric values for ``id_vars`` or ``value_vars`` would incorrectly raise a ``ValueError`` (:issue:`29718`) +- Dtypes are now preserved when transposing a ``DataFrame`` where each column is the same extension dtype (:issue:`30091`) +- Bug in :func:`merge_asof` merging on a tz-aware ``left_index`` and ``right_on`` a tz-aware column (:issue:`29864`) +- Improved error message and docstring in :func:`cut` and :func:`qcut` when `labels=True` (:issue:`13318`) +- Bug in missing `fill_na` parameter to :meth:`DataFrame.unstack` with list of levels (:issue:`30740`) Sparse ^^^^^^ - -- -- +- Bug in :class:`SparseDataFrame` arithmetic operations incorrectly casting inputs to float (:issue:`28107`) +- Bug in ``DataFrame.sparse`` returning a ``Series`` when there was a column named ``sparse`` rather than the accessor (:issue:`30758`) +- Fixed :meth:`operator.xor` with a boolean-dtype ``SparseArray``. Now returns a sparse result, rather than object dtype (:issue:`31025`) ExtensionArray ^^^^^^^^^^^^^^ -- -- +- Bug in :class:`arrays.PandasArray` when setting a scalar string (:issue:`28118`, :issue:`28150`). +- Bug where nullable integers could not be compared to strings (:issue:`28930`) +- Bug where :class:`DataFrame` constructor raised ``ValueError`` with list-like data and ``dtype`` specified (:issue:`30280`) +- Bug in dtype being lost in ``__invert__`` (``~`` operator) for extension-array backed ``Series`` and ``DataFrame`` (:issue:`23087`) Other ^^^^^ -- Appending a dictionary to a :class:`DataFrame` without passing ``ignore_index=True`` will raise ``TypeError: Can only append a dict if ignore_index=True`` - instead of ``TypeError: Can only append a Series if ignore_index=True or if the Series has a name`` (:issue:`30871`) -- +- Trying to set the ``display.precision``, ``display.max_rows`` or ``display.max_columns`` using :meth:`set_option` to anything but a ``None`` or a positive int will raise a ``ValueError`` (:issue:`23348`) +- Using :meth:`DataFrame.replace` with overlapping keys in a nested dictionary will no longer raise, now matching the behavior of a flat dictionary (:issue:`27660`) +- :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` now support dicts as ``compression`` argument with key ``'method'`` being the compression method and others as additional compression options when the compression method is ``'zip'``. (:issue:`26023`) +- Bug in :meth:`Series.diff` where a boolean series would incorrectly raise a ``TypeError`` (:issue:`17294`) +- :meth:`Series.append` will no longer raise a ``TypeError`` when passed a tuple of ``Series`` (:issue:`28410`) +- Fix corrupted error message when calling ``pandas.libs._json.encode()`` on a 0d array (:issue:`18878`) +- Backtick quoting in :meth:`DataFrame.query` and :meth:`DataFrame.eval` can now also be used to use invalid identifiers like names that start with a digit, are python keywords, or are using single character operators. (:issue:`27017`) +- Bug in ``pd.core.util.hashing.hash_pandas_object`` where arrays containing tuples were incorrectly treated as non-hashable (:issue:`28969`) +- Bug in :meth:`DataFrame.append` that raised ``IndexError`` when appending with empty list (:issue:`28769`) +- Fix :class:`AbstractHolidayCalendar` to return correct results for + years after 2030 (now goes up to 2200) (:issue:`27790`) +- Fixed :class:`~arrays.IntegerArray` returning ``inf`` rather than ``NaN`` for operations dividing by ``0`` (:issue:`27398`) +- Fixed ``pow`` operations for :class:`~arrays.IntegerArray` when the other value is ``0`` or ``1`` (:issue:`29997`) +- Bug in :meth:`Series.count` raises if use_inf_as_na is enabled (:issue:`29478`) +- Bug in :class:`Index` where a non-hashable name could be set without raising ``TypeError`` (:issue:`29069`) +- Bug in :class:`DataFrame` constructor when passing a 2D ``ndarray`` and an extension dtype (:issue:`12513`) +- Bug in :meth:`DataFrame.to_csv` when supplied a series with a ``dtype="string"`` and a ``na_rep``, the ``na_rep`` was being truncated to 2 characters. (:issue:`29975`) +- Bug where :meth:`DataFrame.itertuples` would incorrectly determine whether or not namedtuples could be used for dataframes of 255 columns (:issue:`28282`) +- Handle nested NumPy ``object`` arrays in :func:`testing.assert_series_equal` for ExtensionArray implementations (:issue:`30841`) +- Bug in :class:`Index` constructor incorrectly allowing 2-dimensional input arrays (:issue:`13601`, :issue:`27125`) .. --------------------------------------------------------------------------- -.. _whatsnew_110.contributors: +.. _whatsnew_100.contributors: Contributors ~~~~~~~~~~~~ + +.. contributors:: v0.25.3..v1.0.0rc0 From 0b979f9455371e2ea320e0bec75e3cd20805e451 Mon Sep 17 00:00:00 2001 From: rjfs Date: Sat, 7 Mar 2020 21:54:52 +0100 Subject: [PATCH 11/15] restructured test --- .../tests/frame/test_axis_select_reindex.py | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py index 0bc67e9e41ee7..3e2fc25628fa9 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -1159,15 +1159,17 @@ def test_drop_non_empty_list(self, index, drop_labels): @pytest.mark.parametrize("inplace", [False, True]) def test_inplace_drop_and_operation(self, operation, inplace): # GH 30484 - data_dict = {"x": [1, 2, 3, 4, 5], "y": [10, 20, 30, 40, 50]} - df = pd.DataFrame(data_dict) + df = pd.DataFrame({"x": range(5)}) + expected = df.copy() + df["y"] = range(5) y = df["y"] - if inplace: - df.drop("y", axis=1, inplace=inplace) - else: - df = df.drop("y", axis=1, inplace=inplace) - # Perform operation and ensure that df is not changed - expected = df.copy() - getattr(y, operation)(1) - tm.assert_frame_equal(df, expected) + with tm.assert_produces_warning(None): + if inplace: + df.drop("y", axis=1, inplace=inplace) + else: + df = df.drop("y", axis=1, inplace=inplace) + + # Perform operation and check result + getattr(y, operation)(1) + tm.assert_frame_equal(df, expected) From 53cd24a757477f24d873876efae6b565ed3c05d1 Mon Sep 17 00:00:00 2001 From: rjfs Date: Sat, 14 Mar 2020 17:33:32 +0100 Subject: [PATCH 12/15] Update v1.1.0.rst --- doc/source/whatsnew/v1.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 5b6f70be478c2..4a5282cbb4c45 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -335,6 +335,7 @@ Reshaping - Bug in :func:`concat` where the resulting indices are not copied when ``copy=True`` (:issue:`29879`) - :meth:`Series.append` will now raise a ``TypeError`` when passed a DataFrame or a sequence containing Dataframe (:issue:`31413`) - :meth:`DataFrame.replace` and :meth:`Series.replace` will raise a ``TypeError`` if ``to_replace`` is not an expected type. Previously the ``replace`` would fail silently (:issue:`18634`) +- Bug on inplace operation of a Series that was adding a column to the DataFrame from where it was originally dropped from (using inplace=True) (:issue:`30484`) Sparse From 76131c058dc8f7153c8288c6abe97fb964f01697 Mon Sep 17 00:00:00 2001 From: rafasan Date: Thu, 26 Mar 2020 10:25:24 +0100 Subject: [PATCH 13/15] merged with master --- pandas/tests/frame/test_axis_select_reindex.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py index 0f04bd85199bf..556565fb53ed3 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -1,8 +1,11 @@ from datetime import datetime +import re import numpy as np import pytest +from pandas.errors import PerformanceWarning + import pandas as pd from pandas import Categorical, DataFrame, Index, MultiIndex, Series, date_range, isna import pandas._testing as tm @@ -776,4 +779,4 @@ def test_inplace_drop_and_operation(self, operation, inplace): # Perform operation and check result getattr(y, operation)(1) - tm.assert_frame_equal(df, expected) \ No newline at end of file + tm.assert_frame_equal(df, expected) From 560651948208a64e16d993b18d33ec97b9d15914 Mon Sep 17 00:00:00 2001 From: rafasan Date: Thu, 26 Mar 2020 10:56:57 +0100 Subject: [PATCH 14/15] merge with master --- .../tests/frame/test_axis_select_reindex.py | 45 +------------------ 1 file changed, 1 insertion(+), 44 deletions(-) diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py index 556565fb53ed3..7fbbcfcf44223 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -1,11 +1,8 @@ -from datetime import datetime -import re +from datetime import datetime import numpy as np import pytest -from pandas.errors import PerformanceWarning - import pandas as pd from pandas import Categorical, DataFrame, Index, MultiIndex, Series, date_range, isna import pandas._testing as tm @@ -720,46 +717,6 @@ def test_reindex_multi_categorical_time(self): expected = pd.DataFrame({"a": [0, 1, 2, 3, 4, 5, 6, np.nan, 8]}, index=midx) tm.assert_frame_equal(result, expected) - data = [[1, 2, 3], [1, 2, 3]] - - @pytest.mark.parametrize( - "actual", - [ - DataFrame(data=data, index=["a", "a"]), - DataFrame(data=data, index=["a", "b"]), - DataFrame(data=data, index=["a", "b"]).set_index([0, 1]), - DataFrame(data=data, index=["a", "a"]).set_index([0, 1]), - ], - ) - def test_raise_on_drop_duplicate_index(self, actual): - - # issue 19186 - level = 0 if isinstance(actual.index, MultiIndex) else None - msg = re.escape("\"['c'] not found in axis\"") - with pytest.raises(KeyError, match=msg): - actual.drop("c", level=level, axis=0) - with pytest.raises(KeyError, match=msg): - actual.T.drop("c", level=level, axis=1) - expected_no_err = actual.drop("c", axis=0, level=level, errors="ignore") - tm.assert_frame_equal(expected_no_err, actual) - expected_no_err = actual.T.drop("c", axis=1, level=level, errors="ignore") - tm.assert_frame_equal(expected_no_err.T, actual) - - @pytest.mark.parametrize("index", [[1, 2, 3], [1, 1, 2]]) - @pytest.mark.parametrize("drop_labels", [[], [1], [2]]) - def test_drop_empty_list(self, index, drop_labels): - # GH 21494 - expected_index = [i for i in index if i not in drop_labels] - frame = pd.DataFrame(index=index).drop(drop_labels) - tm.assert_frame_equal(frame, pd.DataFrame(index=expected_index)) - - @pytest.mark.parametrize("index", [[1, 2, 3], [1, 2, 2]]) - @pytest.mark.parametrize("drop_labels", [[1, 4], [4, 5]]) - def test_drop_non_empty_list(self, index, drop_labels): - # GH 21494 - with pytest.raises(KeyError, match="not found in axis"): - pd.DataFrame(index=index).drop(drop_labels) - @pytest.mark.parametrize( "operation", ["__iadd__", "__isub__", "__imul__", "__ipow__"] ) From 9bdf45426bbe2b4684f0dcc317d2563327ffb0a2 Mon Sep 17 00:00:00 2001 From: rafasan Date: Thu, 26 Mar 2020 11:02:33 +0100 Subject: [PATCH 15/15] merge with master --- pandas/tests/frame/test_axis_select_reindex.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py index 7fbbcfcf44223..d1d55d38f4a9a 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -1,4 +1,4 @@ -from datetime import datetime +from datetime import datetime import numpy as np import pytest