diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index c5bf943cebca71..a29a27afe9fdab 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -811,6 +811,7 @@ Removal of prior version deprecations/changes in favor of ``iloc`` and ``iat`` as explained :ref:`here ` (:issue:`10711`). - The deprecated ``DataFrame.iterkv()`` has been removed in favor of ``DataFrame.iteritems()`` (:issue:`10711`) - The ``Categorical`` constructor has dropped the ``name`` parameter (:issue:`10632`) +- ``Categorical`` has dropped support for ``NaN`` categories (:issue:`10748`) - The ``take_last`` parameter has been dropped from ``duplicated()``, ``drop_duplicates()``, ``nlargest()``, and ``nsmallest()`` methods (:issue:`10236`, :issue:`10792`, :issue:`10920`) - ``Series``, ``Index``, and ``DataFrame`` have dropped the ``sort`` and ``order`` methods (:issue:`10726`) - Where clauses in ``pytables`` are only accepted as strings and expressions types and not other data-types (:issue:`12027`) diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 0e58c18631588c..c34dea4145e8c2 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -545,18 +545,11 @@ def _validate_categories(cls, categories, fastpath=False): if not fastpath: - # check properties of the categories - # we don't allow NaNs in the categories themselves - + # Categories cannot contain NaN. if categories.hasnans: - # NaNs in cats deprecated in 0.17 - # GH 10748 - msg = ('\nSetting NaNs in `categories` is deprecated and ' - 'will be removed in a future version of pandas.') - warn(msg, FutureWarning, stacklevel=3) - - # categories must be unique + raise ValueError('Categorial categories cannot be NaN') + # Categories must be unique. if not categories.is_unique: raise ValueError('Categorical categories must be unique') diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 6c8aeba704c7be..4d5e62af5cbfe9 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -222,14 +222,6 @@ def f(): cat = pd.Categorical([np.nan, 1., 2., 3.]) self.assertTrue(is_float_dtype(cat.categories)) - # Deprecating NaNs in categoires (GH #10748) - # preserve int as far as possible by converting to object if NaN is in - # categories - with tm.assert_produces_warning(FutureWarning): - cat = pd.Categorical([np.nan, 1, 2, 3], - categories=[np.nan, 1, 2, 3]) - self.assertTrue(is_object_dtype(cat.categories)) - # This doesn't work -> this would probably need some kind of "remember # the original type" feature to try to cast the array interface result # to... @@ -418,6 +410,12 @@ def f(): self.assertRaises(ValueError, f) + # NaN categories included + def f(): + Categorical.from_codes([0, 1, 2], ["a", "b", np.nan]) + + self.assertRaises(ValueError, f) + # too negative def f(): Categorical.from_codes([-2, 1, 2], ["a", "b", "c"])