From b5ab7f2202a26014d81092fa370e847e18273b19 Mon Sep 17 00:00:00 2001
From: Chris Barnes <barnesc@janelia.hhmi.org>
Date: Wed, 27 May 2020 18:26:50 +0100
Subject: [PATCH 01/47] Categorical (to|from)_dummies methods

Simplistic implementation to go between dummy variables and
Categoricals.
---
 pandas/core/arrays/categorical.py | 109 +++++++++++++++++++++++++++++-
 1 file changed, 108 insertions(+), 1 deletion(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index ef69d6565cfeb..6847224ccfc8d 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -2,7 +2,7 @@
 from functools import partial
 import operator
 from shutil import get_terminal_size
-from typing import Dict, Hashable, List, Type, Union, cast
+from typing import TYPE_CHECKING, Dict, Hashable, List, Type, Union, cast
 from warnings import warn
 
 import numpy as np
@@ -55,6 +55,9 @@
 
 from pandas.io.formats import console
 
+if TYPE_CHECKING:
+    from pandas._typing import DataFrame  # noqa: F401
+
 
 def _cat_compare_op(op):
     opname = f"__{op.__name__}__"
@@ -370,6 +373,110 @@ def __init__(
         self._dtype = self._dtype.update_dtype(dtype)
         self._codes = coerce_indexer_dtype(codes, dtype.categories)
 
+    @classmethod
+    def from_dummies(cls, dummies: "DataFrame", ordered=None):
+        """
+        Create a `Categorical` using a ``DataFrame`` encoding those categories
+        as dummy/ one-hot encoded variables.
+
+        The ``DataFrame`` must be coercible to boolean,
+        and have no more than one truthy value per row.
+        The columns of the ``DataFrame`` become the categories of the `Categorical`.
+        A column whose header is NA will be dropped.
+
+        Parameters
+        ----------
+            dummies : DataFrame of bool-like
+            ordered : bool
+                Whether or not this Categorical is ordered.
+
+        Raises
+        ------
+            ValueError
+                If a sample belongs to >1 category
+
+        Returns
+        -------
+        Categorical
+
+        Examples
+        --------
+        >>> df = pd.DataFrame(
+        ... [[1, 0, 0], [0, 1, 0], [0, 0, 1]], columns=["a", "b", "c"]
+        ... )
+        >>> Categorical.from_dummies(df)
+        [a, b, c]
+        Categories (3, object): [a, b, c]
+        """
+        # GH 8745
+        from pandas import Series
+
+        df = dummies.drop(columns=np.nan, errors="ignore").astype(bool)
+
+        if (df.sum(axis=1) > 1).any():
+            raise ValueError("Some rows belong to >1 category")
+
+        index_into = Series([np.nan] + list(df.columns))
+        mult_by = np.arange(1, len(index_into))
+
+        codes = (df.astype(int) * mult_by).sum(axis=1) - 1
+        codes[codes.isna()] = -1
+        return cls.from_codes(codes, df.columns.values, ordered=ordered)
+
+    def to_dummies(self, na_column=None) -> "DataFrame":
+        """
+        Create a ``DataFrame`` representing this `Categorical`
+        as dummy/ one-hot encoded variables.
+
+        For more power over column names or to use a sparse matrix,
+        see :func:`pandas.get_dummies`.
+
+        Parameters
+        ----------
+            na_column : Optional
+                If None, NA values will be represented as a row of zeros.
+                Otherwise, this is the name of a new column representing
+                those NA values.
+
+        Returns
+        -------
+        DataFrame
+
+        Examples
+        --------
+        >>> Categorical(["a", "b", "c"]).to_dummies()
+           a      b      c
+        0  True   False  False
+        1  False  True   False
+        2  False  False  True
+
+        >>> Categorical(["a", "b", np.nan]).to_dummies()
+           a      b
+        0  True   False
+        1  False  True
+        2  False  False
+
+        >>> Categorical(["a", "b", np.nan]).to_dummies("c")
+           a      b      c
+        0  True   False  False
+        1  False  True   False
+        2  False  False  True
+
+        See Also
+        --------
+        :func:`pandas.get_dummies`
+        """
+        from pandas import DataFrame, CategoricalIndex
+
+        eye = np.eye(len(self.categories) + 1, dtype=bool)
+        arr = eye[self.codes, :]
+
+        if na_column is None:
+            return DataFrame(arr[:, :-1], columns=CategoricalIndex(self.categories))
+        else:
+            cat_lst = list(self.categories) + [na_column]
+            return DataFrame(arr, columns=CategoricalIndex(cat_lst))
+
     @property
     def dtype(self) -> CategoricalDtype:
         """

From f937c96a5d61bbe7369284d5111a4403d9e045b8 Mon Sep 17 00:00:00 2001
From: Chris Barnes <barnesc@janelia.hhmi.org>
Date: Thu, 28 May 2020 11:02:52 +0100
Subject: [PATCH 02/47] Tests: Categorical.(to|from)_dummies

---
 pandas/tests/arrays/categorical/test_api.py   | 15 +++++-
 .../arrays/categorical/test_constructors.py   | 51 +++++++++++++++++++
 2 files changed, 65 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/arrays/categorical/test_api.py b/pandas/tests/arrays/categorical/test_api.py
index 6fce4b4145ff2..8e435f47e84ea 100644
--- a/pandas/tests/arrays/categorical/test_api.py
+++ b/pandas/tests/arrays/categorical/test_api.py
@@ -3,7 +3,7 @@
 import numpy as np
 import pytest
 
-from pandas import Categorical, CategoricalIndex, DataFrame, Index, Series
+from pandas import Categorical, CategoricalIndex, DataFrame, Index, Series, get_dummies
 import pandas._testing as tm
 from pandas.core.arrays.categorical import recode_for_categories
 from pandas.tests.arrays.categorical.common import TestCategorical
@@ -399,6 +399,19 @@ def test_remove_unused_categories(self):
         out = cat.remove_unused_categories()
         assert out.tolist() == val.tolist()
 
+    @pytest.mark.parametrize(
+        "vals",
+        [
+            ["a", "b", "b", "a"],
+            ["a", "b", "b", "a", np.nan],
+            [1, 1.5, "a", (1, "b")],
+            [1, 1.5, "a", (1, "b"), np.nan],
+        ],
+    )
+    def test_to_dummies(self, vals):
+        cats = Categorical(vals)
+        tm.assert_equal(cats.to_dummies(), get_dummies(cats).astype(bool))
+
 
 class TestCategoricalAPIWithFactor(TestCategorical):
     def test_describe(self):
diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py
index e200f13652a84..b296838290360 100644
--- a/pandas/tests/arrays/categorical/test_constructors.py
+++ b/pandas/tests/arrays/categorical/test_constructors.py
@@ -3,6 +3,8 @@
 import numpy as np
 import pytest
 
+from pandas.compat.numpy import _np_version_under1p16
+
 from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype
 from pandas.core.dtypes.dtypes import CategoricalDtype
 
@@ -10,6 +12,7 @@
 from pandas import (
     Categorical,
     CategoricalIndex,
+    DataFrame,
     DatetimeIndex,
     Index,
     Interval,
@@ -19,6 +22,7 @@
     Series,
     Timestamp,
     date_range,
+    get_dummies,
     period_range,
     timedelta_range,
 )
@@ -635,6 +639,7 @@ def test_constructor_imaginary(self):
         tm.assert_index_equal(c1.categories, Index(values))
         tm.assert_numpy_array_equal(np.array(c1), np.array(values))
 
+    @pytest.mark.skipif(_np_version_under1p16, reason="Skipping for NumPy <1.16")
     def test_constructor_string_and_tuples(self):
         # GH 21416
         c = pd.Categorical(np.array(["c", ("a", "b"), ("b", "a"), "c"], dtype=object))
@@ -682,3 +687,49 @@ def test_interval(self):
         expected_codes = np.array([0, 1], dtype="int8")
         tm.assert_numpy_array_equal(cat.codes, expected_codes)
         tm.assert_index_equal(cat.categories, idx)
+
+    def test_from_dummies(self):
+        # GH 8745
+        raw = ["a", "a", "b", "c", "c", "a"]
+        dummies = get_dummies(raw)
+        cats = Categorical.from_dummies(dummies)
+        assert list(cats) == raw
+
+    def test_from_dummies_nan(self):
+        raw = ["a", "a", "b", "c", "c", "a", np.nan]
+        dummies = get_dummies(raw)
+        cats = Categorical.from_dummies(dummies)
+        assert list(cats)[:-1] == raw[:-1]
+        assert pd.isna(list(cats)[-1])
+
+    def test_from_dummies_gt1(self):
+        dummies = DataFrame([[1, 0, 1], [0, 1, 0], [0, 0, 1]], columns=["a", "b", "c"])
+        with pytest.raises(ValueError):
+            Categorical.from_dummies(dummies)
+
+    @pytest.mark.parametrize("ordered", [None, False, True])
+    def test_from_dummies_ordered(self, ordered):
+        raw = ["a", "a", "b", "c", "c", "a"]
+        dummies = get_dummies(raw)
+        cats = Categorical.from_dummies(dummies, ordered)
+        assert cats.ordered == bool(ordered)
+
+    def test_from_dummies_types(self):
+        cols = ["a", 1, 1.5, ("a", "b"), (1, "c")]
+        dummies = DataFrame(np.eye(len(cols)), columns=cols)
+        cats = Categorical.from_dummies(dummies)
+        assert list(cats) == cols
+
+    def test_from_dummies_drops_na(self):
+        cols = ["a", "b", np.nan]
+        dummies = DataFrame(np.eye(len(cols)), columns=cols)
+        cats = Categorical.from_dummies(dummies)
+        assert list(cats.categories) == cols[:-1]
+        assert pd.isna(cats[-1])
+
+    def test_from_dummies_multiindex(self):
+        tups = [("a", 1), ("a", 2), ("b", 1), ("b", 2)]
+        cols = MultiIndex.from_tuples(tups)
+        dummies = DataFrame(np.eye(len(cols)), columns=cols)
+        cats = Categorical.from_dummies(dummies)
+        assert list(cats.categories) == tups

From dd141320549ba4e2ee3cfbdee5cd1903ba0ca5bb Mon Sep 17 00:00:00 2001
From: Chris Barnes <barnesc@janelia.hhmi.org>
Date: Thu, 28 May 2020 11:06:54 +0100
Subject: [PATCH 03/47] Add reference to Categorical.to_dummies to get_dummies

---
 pandas/core/reshape/reshape.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
index 18ebe14763797..e848f968b64e4 100644
--- a/pandas/core/reshape/reshape.py
+++ b/pandas/core/reshape/reshape.py
@@ -768,6 +768,7 @@ def get_dummies(
     See Also
     --------
     Series.str.get_dummies : Convert Series to dummy codes.
+    Categorical.to_dummies : Simply create dummy variables from a Categorical.
 
     Examples
     --------

From 9dc9da5b7f333ffc5f5e85615f1052e9a199931d Mon Sep 17 00:00:00 2001
From: Chris Barnes <barnesc@janelia.hhmi.org>
Date: Thu, 28 May 2020 14:08:32 +0100
Subject: [PATCH 04/47] whatsnew: add issue number to
 Categorical.(to|from)_dummies

---
 doc/source/whatsnew/v1.1.0.rst | 27 ++++-----------------------
 1 file changed, 4 insertions(+), 23 deletions(-)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index a49b29d691692..f2df6579ced70 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -318,29 +318,10 @@ Other enhancements
   compression library. Compression was also added to the low-level Stata-file writers
   :class:`~pandas.io.stata.StataWriter`, :class:`~pandas.io.stata.StataWriter117`,
   and :class:`~pandas.io.stata.StataWriterUTF8` (:issue:`26599`).
-- :meth:`HDFStore.put` now accepts a ``track_times`` parameter. This parameter is passed to the ``create_table`` method of ``PyTables`` (:issue:`32682`).
-- :meth:`Series.plot` and :meth:`DataFrame.plot` now accepts `xlabel` and `ylabel` parameters to present labels on x and y axis (:issue:`9093`).
-- Made :class:`pandas.core.window.rolling.Rolling` and :class:`pandas.core.window.expanding.Expanding` iterable（:issue:`11704`)
-- Made ``option_context`` a :class:`contextlib.ContextDecorator`, which allows it to be used as a decorator over an entire function (:issue:`34253`).
-- :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` now accept an ``errors`` argument (:issue:`22610`)
-- :meth:`~pandas.core.groupby.DataFrameGroupBy.groupby.transform` now allows ``func`` to be ``pad``, ``backfill`` and ``cumcount`` (:issue:`31269`).
-- :func:`read_json` now accepts an ``nrows`` parameter. (:issue:`33916`).
-- :meth:`DataFrame.hist`, :meth:`Series.hist`, :meth:`core.groupby.DataFrameGroupBy.hist`, and :meth:`core.groupby.SeriesGroupBy.hist` have gained the ``legend`` argument. Set to True to show a legend in the histogram. (:issue:`6279`)
-- :func:`concat` and :meth:`~DataFrame.append` now preserve extension dtypes, for example
-  combining a nullable integer column with a numpy integer column will no longer
-  result in object dtype but preserve the integer dtype (:issue:`33607`, :issue:`34339`, :issue:`34095`).
-- :func:`read_gbq` now allows to disable progress bar (:issue:`33360`).
-- :func:`read_gbq` now supports the ``max_results`` kwarg from ``pandas-gbq`` (:issue:`34639`).
-- :meth:`DataFrame.cov` and :meth:`Series.cov` now support a new parameter ``ddof`` to support delta degrees of freedom as in the corresponding numpy methods (:issue:`34611`).
-- :meth:`DataFrame.to_html` and :meth:`DataFrame.to_string`'s ``col_space`` parameter now accepts a list or dict to change only some specific columns' width (:issue:`28917`).
-- :meth:`DataFrame.to_excel` can now also write OpenOffice spreadsheet (.ods) files (:issue:`27222`)
-- :meth:`~Series.explode` now accepts ``ignore_index`` to reset the index, similar to :meth:`pd.concat` or :meth:`DataFrame.sort_values` (:issue:`34932`).
-- :meth:`DataFrame.to_markdown` and :meth:`Series.to_markdown` now accept ``index`` argument as an alias for tabulate's ``showindex`` (:issue:`32667`)
-- :meth:`read_csv` now accepts string values like "0", "0.0", "1", "1.0" as convertible to the nullable Boolean dtype (:issue:`34859`)
-- :class:`pandas.core.window.ExponentialMovingWindow` now supports a ``times`` argument that allows ``mean`` to be calculated with observations spaced by the timestamps in ``times`` (:issue:`34839`)
-- :meth:`DataFrame.agg` and :meth:`Series.agg` now accept named aggregation for renaming the output columns/indexes. (:issue:`26513`)
-- ``compute.use_numba`` now exists as a configuration option that utilizes the numba engine when available (:issue:`33966`, :issue:`35374`)
-- :meth:`Series.plot` now supports asymmetric error bars. Previously, if :meth:`Series.plot` received a "2xN" array with error values for `yerr` and/or `xerr`, the left/lower values (first row) were mirrored, while the right/upper values (second row) were ignored. Now, the first row represents the left/lower error values and the second row the right/upper error values. (:issue:`9536`)
+- :meth:`HDFStore.put` now accepts `track_times` parameter. Parameter is passed to ``create_table`` method of ``PyTables`` (:issue:`32682`).
+- Make :class:`pandas.core.window.Rolling` and :class:`pandas.core.window.Expanding` iterable（:issue:`11704`)
+- Make ``option_context`` a :class:`contextlib.ContextDecorator`, which allows it to be used as a decorator over an entire function (:issue:`34253`).
+- :class:`~pandas.core.arrays.categorical.Categorical` now has methods for converting to and from dummy/ one-hot encoded variables: :meth:`Categorical.to_dummies` and :meth:`Categorical.from_dummies` respectively. :meth:`Categorical.to_dummies` is smaller in scope than :func:`~pandas.core.reshape.reshape.get_dummies`, which can still be used if you require the extra flexibility.
 
 .. ---------------------------------------------------------------------------
 

From ac9cec26bad2959dda4e3592bacde87efaee9436 Mon Sep 17 00:00:00 2001
From: Chris Barnes <barnesc@janelia.hhmi.org>
Date: Thu, 28 May 2020 14:15:39 +0100
Subject: [PATCH 05/47] Review comments for dummies tests

---
 pandas/tests/arrays/categorical/test_api.py          | 1 +
 pandas/tests/arrays/categorical/test_constructors.py | 6 ++++++
 2 files changed, 7 insertions(+)

diff --git a/pandas/tests/arrays/categorical/test_api.py b/pandas/tests/arrays/categorical/test_api.py
index 8e435f47e84ea..b65cabe735c5b 100644
--- a/pandas/tests/arrays/categorical/test_api.py
+++ b/pandas/tests/arrays/categorical/test_api.py
@@ -409,6 +409,7 @@ def test_remove_unused_categories(self):
         ],
     )
     def test_to_dummies(self, vals):
+        # GH 8745
         cats = Categorical(vals)
         tm.assert_equal(cats.to_dummies(), get_dummies(cats).astype(bool))
 
diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py
index b296838290360..44ee6eddae628 100644
--- a/pandas/tests/arrays/categorical/test_constructors.py
+++ b/pandas/tests/arrays/categorical/test_constructors.py
@@ -696,6 +696,7 @@ def test_from_dummies(self):
         assert list(cats) == raw
 
     def test_from_dummies_nan(self):
+        # GH 8745
         raw = ["a", "a", "b", "c", "c", "a", np.nan]
         dummies = get_dummies(raw)
         cats = Categorical.from_dummies(dummies)
@@ -703,24 +704,28 @@ def test_from_dummies_nan(self):
         assert pd.isna(list(cats)[-1])
 
     def test_from_dummies_gt1(self):
+        # GH 8745
         dummies = DataFrame([[1, 0, 1], [0, 1, 0], [0, 0, 1]], columns=["a", "b", "c"])
         with pytest.raises(ValueError):
             Categorical.from_dummies(dummies)
 
     @pytest.mark.parametrize("ordered", [None, False, True])
     def test_from_dummies_ordered(self, ordered):
+        # GH 8745
         raw = ["a", "a", "b", "c", "c", "a"]
         dummies = get_dummies(raw)
         cats = Categorical.from_dummies(dummies, ordered)
         assert cats.ordered == bool(ordered)
 
     def test_from_dummies_types(self):
+        # GH 8745
         cols = ["a", 1, 1.5, ("a", "b"), (1, "c")]
         dummies = DataFrame(np.eye(len(cols)), columns=cols)
         cats = Categorical.from_dummies(dummies)
         assert list(cats) == cols
 
     def test_from_dummies_drops_na(self):
+        # GH 8745
         cols = ["a", "b", np.nan]
         dummies = DataFrame(np.eye(len(cols)), columns=cols)
         cats = Categorical.from_dummies(dummies)
@@ -728,6 +733,7 @@ def test_from_dummies_drops_na(self):
         assert pd.isna(cats[-1])
 
     def test_from_dummies_multiindex(self):
+        # GH 8745
         tups = [("a", 1), ("a", 2), ("b", 1), ("b", 2)]
         cols = MultiIndex.from_tuples(tups)
         dummies = DataFrame(np.eye(len(cols)), columns=cols)

From 0459cb130207d3f0eac7d2fa625ba94e553e6769 Mon Sep 17 00:00:00 2001
From: Chris Barnes <barnesc@janelia.hhmi.org>
Date: Thu, 28 May 2020 14:16:18 +0100
Subject: [PATCH 06/47] Review comments for dummies implementation

---
 pandas/core/arrays/categorical.py | 39 ++++++++++++++-----------------
 1 file changed, 18 insertions(+), 21 deletions(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 6847224ccfc8d..237d4f582a4f8 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -2,7 +2,7 @@
 from functools import partial
 import operator
 from shutil import get_terminal_size
-from typing import TYPE_CHECKING, Dict, Hashable, List, Type, Union, cast
+from typing import TYPE_CHECKING, Dict, Hashable, List, Optional, Type, Union, cast
 from warnings import warn
 
 import numpy as np
@@ -374,26 +374,27 @@ def __init__(
         self._codes = coerce_indexer_dtype(codes, dtype.categories)
 
     @classmethod
-    def from_dummies(cls, dummies: "DataFrame", ordered=None):
-        """
-        Create a `Categorical` using a ``DataFrame`` encoding those categories
-        as dummy/ one-hot encoded variables.
+    def from_dummies(
+        cls, dummies: "DataFrame", ordered: Optional[bool] = None
+    ) -> "Categorical":
+        """Create a `Categorical` using a ``DataFrame`` of dummy variables.
 
         The ``DataFrame`` must be coercible to boolean,
         and have no more than one truthy value per row.
         The columns of the ``DataFrame`` become the categories of the `Categorical`.
-        A column whose header is NA will be dropped.
+        A column whose header is NA will be dropped;
+        any row with a NA value will be uncategorised.
 
         Parameters
         ----------
-            dummies : DataFrame of bool-like
-            ordered : bool
-                Whether or not this Categorical is ordered.
+        dummies : DataFrame of bool-like
+        ordered : bool
+            Whether or not this Categorical is ordered.
 
         Raises
         ------
-            ValueError
-                If a sample belongs to >1 category
+        ValueError
+            If a sample belongs to >1 category
 
         Returns
         -------
@@ -409,15 +410,12 @@ def from_dummies(cls, dummies: "DataFrame", ordered=None):
         Categories (3, object): [a, b, c]
         """
         # GH 8745
-        from pandas import Series
-
         df = dummies.drop(columns=np.nan, errors="ignore").astype(bool)
 
         if (df.sum(axis=1) > 1).any():
             raise ValueError("Some rows belong to >1 category")
 
-        index_into = Series([np.nan] + list(df.columns))
-        mult_by = np.arange(1, len(index_into))
+        mult_by = np.arange(1, df.shape[1] + 1)
 
         codes = (df.astype(int) * mult_by).sum(axis=1) - 1
         codes[codes.isna()] = -1
@@ -425,18 +423,17 @@ def from_dummies(cls, dummies: "DataFrame", ordered=None):
 
     def to_dummies(self, na_column=None) -> "DataFrame":
         """
-        Create a ``DataFrame`` representing this `Categorical`
-        as dummy/ one-hot encoded variables.
+        Create a ``DataFrame`` of boolean dummy variables representing this object.
 
         For more power over column names or to use a sparse matrix,
         see :func:`pandas.get_dummies`.
 
         Parameters
         ----------
-            na_column : Optional
-                If None, NA values will be represented as a row of zeros.
-                Otherwise, this is the name of a new column representing
-                those NA values.
+        na_column : Optional
+            If None, NA values will be represented as a row of zeros.
+            Otherwise, this is the name of a new column representing
+            those NA values.
 
         Returns
         -------

From 65e68c26c61374d26078b3a419f81e72ae62d3cd Mon Sep 17 00:00:00 2001
From: Chris Barnes <barnesc@janelia.hhmi.org>
Date: Fri, 29 May 2020 10:04:13 +0100
Subject: [PATCH 07/47] dummies review comments

---
 pandas/core/arrays/categorical.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 237d4f582a4f8..1e8f59b0797c6 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -409,15 +409,17 @@ def from_dummies(
         [a, b, c]
         Categories (3, object): [a, b, c]
         """
-        # GH 8745
         df = dummies.drop(columns=np.nan, errors="ignore").astype(bool)
 
         if (df.sum(axis=1) > 1).any():
             raise ValueError("Some rows belong to >1 category")
 
-        mult_by = np.arange(1, df.shape[1] + 1)
-
-        codes = (df.astype(int) * mult_by).sum(axis=1) - 1
+        mult_by = np.arange(df.shape[1]) + 1
+        #  000            000    0   -1
+        #  010            020    2    1
+        #  001 * 1,2,3 => 003 -> 3 -> 2 = correct codes
+        #  100            100    1    0
+        codes = (df * mult_by).sum(axis=1) - 1
         codes[codes.isna()] = -1
         return cls.from_codes(codes, df.columns.values, ordered=ordered)
 
@@ -453,8 +455,8 @@ def to_dummies(self, na_column=None) -> "DataFrame":
         1  False  True
         2  False  False
 
-        >>> Categorical(["a", "b", np.nan]).to_dummies("c")
-           a      b      c
+        >>> Categorical(["a", "b", np.nan]).to_dummies("other")
+           a      b      other
         0  True   False  False
         1  False  True   False
         2  False  False  True

From 133402619cd57bdb1e0dfa66d466aff8f52c9b42 Mon Sep 17 00:00:00 2001
From: Chris Barnes <barnesc@janelia.hhmi.org>
Date: Fri, 29 May 2020 10:40:31 +0100
Subject: [PATCH 08/47] User guide: Describe Categorical.(to|from)_dummies

---
 doc/source/user_guide/categorical.rst | 42 +++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/doc/source/user_guide/categorical.rst b/doc/source/user_guide/categorical.rst
index b7475ae7bb132..e5203ef8d3c3f 100644
--- a/doc/source/user_guide/categorical.rst
+++ b/doc/source/user_guide/categorical.rst
@@ -127,6 +127,48 @@ This conversion is likewise done column by column:
     df_cat['A']
     df_cat['B']
 
+Dummy / indicator / one-hot encoded variables
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Some operations, like regression and classification,
+encodes a single categorical variable as a column for each category,
+with each row having False in all but one column (True).
+These are called dummy variables, or one-hot encoding.
+:class:`pandas.Categorical`s can easily be converted to and from such an encoding:
+
+.. ipython:: python
+
+    cat = pd.Categorical(["a", "b", "b", "c"])
+    cat
+
+    dummies = cat.to_dummies()
+    dummies
+
+    pd.Categorical.from_dummies(dummies)
+
+The :meth:`pandas.Categorical.from_dummies` class method accepts a dataframe
+whose dtypes are coercible to boolean, and an ``ordered`` argument
+for whether the resulting ``Categorical`` should be considered ordered
+(like the ``Categorical`` constructor).
+A column with a NA index will be ignored.
+Any row which is entirely falsey, or has a missing value,
+will be uncategorised.
+
+:meth:`pandas.Categorical.to_dummies` produces a boolean dataframe of dummy variables.
+If the ``na_column`` argument is ``None`` (default),
+missing items will result in a row of ``False``.
+Otherwise, the value of ``na_column`` will be used as the index
+of an extra column representing these items:
+
+.. ipython:: python
+
+    cat = pd.Categorical(["a", "b", np.nan])
+    cat.to_dummies(na_column="other")
+
+For more control over data types and column names,
+see :func:`pandas.get_dummies`.
+
+.. versionadded:: 1.1.0
 
 Controlling behavior
 ~~~~~~~~~~~~~~~~~~~~

From c2240b67451c3588df4ddf9caca32a2da6fabaa0 Mon Sep 17 00:00:00 2001
From: Chris Barnes <barnesc@janelia.hhmi.org>
Date: Fri, 29 May 2020 11:21:53 +0100
Subject: [PATCH 09/47] Fix user guide errors

---
 doc/source/user_guide/categorical.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/source/user_guide/categorical.rst b/doc/source/user_guide/categorical.rst
index e5203ef8d3c3f..adf9d4f551d72 100644
--- a/doc/source/user_guide/categorical.rst
+++ b/doc/source/user_guide/categorical.rst
@@ -128,13 +128,13 @@ This conversion is likewise done column by column:
     df_cat['B']
 
 Dummy / indicator / one-hot encoded variables
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 Some operations, like regression and classification,
 encodes a single categorical variable as a column for each category,
 with each row having False in all but one column (True).
 These are called dummy variables, or one-hot encoding.
-:class:`pandas.Categorical`s can easily be converted to and from such an encoding:
+:class:`pandas.Categorical` objects can easily be converted to and from such an encoding:
 
 .. ipython:: python
 

From 66771bfe0f4d85484d10e6be000468cae4ca01f1 Mon Sep 17 00:00:00 2001
From: Chris Barnes <barnesc@janelia.hhmi.org>
Date: Fri, 29 May 2020 13:00:16 +0100
Subject: [PATCH 10/47] Fix numpy element from sequence error

---
 pandas/core/arrays/categorical.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 1e8f59b0797c6..fc16a96b710de 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -465,7 +465,7 @@ def to_dummies(self, na_column=None) -> "DataFrame":
         --------
         :func:`pandas.get_dummies`
         """
-        from pandas import DataFrame, CategoricalIndex
+        from pandas import DataFrame, CategoricalIndex, Series
 
         eye = np.eye(len(self.categories) + 1, dtype=bool)
         arr = eye[self.codes, :]
@@ -473,8 +473,8 @@ def to_dummies(self, na_column=None) -> "DataFrame":
         if na_column is None:
             return DataFrame(arr[:, :-1], columns=CategoricalIndex(self.categories))
         else:
-            cat_lst = list(self.categories) + [na_column]
-            return DataFrame(arr, columns=CategoricalIndex(cat_lst))
+            cats = CategoricalIndex(Series(list(self.categories) + [na_column]))
+            return DataFrame(arr, columns=cats)
 
     @property
     def dtype(self) -> CategoricalDtype:

From 4e769da521736e8c033dff801ff345a8fbed7995 Mon Sep 17 00:00:00 2001
From: Chris Barnes <barnesc@janelia.hhmi.org>
Date: Fri, 29 May 2020 13:00:37 +0100
Subject: [PATCH 11/47] Test to_dummies column type cast

---
 pandas/tests/arrays/categorical/test_api.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/arrays/categorical/test_api.py b/pandas/tests/arrays/categorical/test_api.py
index b65cabe735c5b..0122fd581f43d 100644
--- a/pandas/tests/arrays/categorical/test_api.py
+++ b/pandas/tests/arrays/categorical/test_api.py
@@ -410,9 +410,18 @@ def test_remove_unused_categories(self):
     )
     def test_to_dummies(self, vals):
         # GH 8745
-        cats = Categorical(vals)
+        cats = Categorical(Series(vals))
         tm.assert_equal(cats.to_dummies(), get_dummies(cats).astype(bool))
 
+    def test_to_dummies_na_dtype(self):
+        # when dtype of NA column name != dtype of categories,
+        # check the cast to object
+        # GH 8745
+        cats = Categorical([1, 2, 2, 1, np.nan])
+        assert cats.dtype != object
+        dummies = cats.to_dummies(na_column="other")
+        assert dummies.columns.categories.dtype == object
+
 
 class TestCategoricalAPIWithFactor(TestCategorical):
     def test_describe(self):

From fe002af13443a7d9e1b86ba492abbc9fe7254f26 Mon Sep 17 00:00:00 2001
From: Chris Barnes <barnesc@janelia.hhmi.org>
Date: Fri, 29 May 2020 15:56:40 +0100
Subject: [PATCH 12/47] Test review comments

- Test Categories.from_dummies from sparse
- Test that all NA-valued headers are dropped
- Test for informative error message
---
 .../tests/arrays/categorical/test_constructors.py  | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py
index 44ee6eddae628..849e0819fe241 100644
--- a/pandas/tests/arrays/categorical/test_constructors.py
+++ b/pandas/tests/arrays/categorical/test_constructors.py
@@ -688,25 +688,27 @@ def test_interval(self):
         tm.assert_numpy_array_equal(cat.codes, expected_codes)
         tm.assert_index_equal(cat.categories, idx)
 
-    def test_from_dummies(self):
+    @pytest.mark.parametrize("sparse", [True, False])
+    def test_from_dummies(self, sparse):
         # GH 8745
         raw = ["a", "a", "b", "c", "c", "a"]
-        dummies = get_dummies(raw)
+        dummies = get_dummies(raw, sparse=sparse)
         cats = Categorical.from_dummies(dummies)
         assert list(cats) == raw
 
-    def test_from_dummies_nan(self):
+    @pytest.mark.parametrize("na_val", [np.nan, pd.NA, None, pd.NaT])
+    def test_from_dummies_nan(self, na_val):
         # GH 8745
-        raw = ["a", "a", "b", "c", "c", "a", np.nan]
+        raw = ["a", "a", "b", "c", "c", "a", na_val]
         dummies = get_dummies(raw)
         cats = Categorical.from_dummies(dummies)
         assert list(cats)[:-1] == raw[:-1]
         assert pd.isna(list(cats)[-1])
 
-    def test_from_dummies_gt1(self):
+    def test_from_dummies_multiple(self):
         # GH 8745
         dummies = DataFrame([[1, 0, 1], [0, 1, 0], [0, 0, 1]], columns=["a", "b", "c"])
-        with pytest.raises(ValueError):
+        with pytest.raises(ValueError, match="multiple categories"):
             Categorical.from_dummies(dummies)
 
     @pytest.mark.parametrize("ordered", [None, False, True])

From 097f2c6bcf9e46596560b1f2ddb75e06d7dbc380 Mon Sep 17 00:00:00 2001
From: Chris Barnes <barnesc@janelia.hhmi.org>
Date: Fri, 29 May 2020 15:58:06 +0100
Subject: [PATCH 13/47] Review comments for implementation

- Handle more NA types
- More examples
- More informative error message
---
 pandas/core/arrays/categorical.py | 54 +++++++++++++++++++++++--------
 1 file changed, 41 insertions(+), 13 deletions(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index fc16a96b710de..3219e1c8762bb 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -379,15 +379,15 @@ def from_dummies(
     ) -> "Categorical":
         """Create a `Categorical` using a ``DataFrame`` of dummy variables.
 
-        The ``DataFrame`` must be coercible to boolean,
-        and have no more than one truthy value per row.
+        The ``DataFrame`` must have no more than one truthy value per row.
         The columns of the ``DataFrame`` become the categories of the `Categorical`.
-        A column whose header is NA will be dropped;
-        any row with a NA value will be uncategorised.
+        A column whose header is NA will be dropped:
+        any row containing a NA value will be uncategorised.
 
         Parameters
         ----------
-        dummies : DataFrame of bool-like
+        dummies : DataFrame
+            dtypes of columns with non-NA headers must be coercible to bool.
         ordered : bool
             Whether or not this Categorical is ordered.
 
@@ -402,17 +402,45 @@ def from_dummies(
 
         Examples
         --------
-        >>> df = pd.DataFrame(
-        ... [[1, 0, 0], [0, 1, 0], [0, 0, 1]], columns=["a", "b", "c"]
-        ... )
-        >>> Categorical.from_dummies(df)
+        >>> simple = pd.DataFrame(np.eye(3), columns=["a", "b", "c"])
+        >>> Categorical.from_dummies(simple)
         [a, b, c]
         Categories (3, object): [a, b, c]
-        """
-        df = dummies.drop(columns=np.nan, errors="ignore").astype(bool)
 
-        if (df.sum(axis=1) > 1).any():
-            raise ValueError("Some rows belong to >1 category")
+        >>> nan_col = pd.DataFrame(np.eye(4), columns=["a", "b", np.nan, None])
+        >>> Categorical.from_dummies(nan_col)
+        [a, b, NaN, NaN]
+        Categories (2, object): [a, b]
+
+        >>> nan_cell = pd.DataFrame(
+        ...     [[1, 0, np.nan], [0, 1, 0], [0, 0, 1]],
+        ...     columns=["a", "b", "c"],
+        ... )
+        >>> Categorical.from_dummies(nan_cell)
+        [NaN, b, c]
+        Categories (3, object): [a, b, c]
+
+        >>> multi = pd.DataFrame(
+        ...     [[1, 0, 1], [0, 1, 0], [0, 0, 1]],
+        ...     columns=["a", "b", "c"],
+        ... )
+        >>> Categorical.from_dummies(multi)
+        Traceback (most recent call last):
+            ...
+        ValueError: 1 record(s) belongs to multiple categories: [0]
+        """
+        to_drop = dummies.columns[dummies.columns.isna()]
+        if len(to_drop):
+            dummies = dummies.drop(columns=to_drop)
+        df = dummies.astype(bool)
+
+        multicat_rows = df.sum(axis=1) > 1
+        if multicat_rows.any():
+            raise ValueError(
+                "{} record(s) belongs to multiple categories: {}".format(
+                    multicat_rows.sum(), list(df.index[multicat_rows]),
+                )
+            )
 
         mult_by = np.arange(df.shape[1]) + 1
         #  000            000    0   -1

From afe8eda71f7c9ddff9d68ad85dfeef8ddbe0b901 Mon Sep 17 00:00:00 2001
From: Chris Barnes <barnesc@janelia.hhmi.org>
Date: Fri, 29 May 2020 16:28:35 +0100
Subject: [PATCH 14/47] Fix doctest for missing values

---
 pandas/core/arrays/categorical.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 3219e1c8762bb..307f1413e7d5e 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -432,9 +432,9 @@ def from_dummies(
         to_drop = dummies.columns[dummies.columns.isna()]
         if len(to_drop):
             dummies = dummies.drop(columns=to_drop)
-        df = dummies.astype(bool)
+        df = dummies.astype("boolean")
 
-        multicat_rows = df.sum(axis=1) > 1
+        multicat_rows = df.sum(axis=1, skipna=False) > 1
         if multicat_rows.any():
             raise ValueError(
                 "{} record(s) belongs to multiple categories: {}".format(

From e78158ea4aec01ebdbcf6314b6e2430860cb135c Mon Sep 17 00:00:00 2001
From: Chris Barnes <barnesc@janelia.hhmi.org>
Date: Fri, 29 May 2020 16:33:29 +0100
Subject: [PATCH 15/47] xfail for Categorical from sparse

---
 pandas/tests/arrays/categorical/test_constructors.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py
index 849e0819fe241..0b1f7819d9505 100644
--- a/pandas/tests/arrays/categorical/test_constructors.py
+++ b/pandas/tests/arrays/categorical/test_constructors.py
@@ -690,6 +690,8 @@ def test_interval(self):
 
     @pytest.mark.parametrize("sparse", [True, False])
     def test_from_dummies(self, sparse):
+        if sparse:
+            pytest.xfail("from sparse is not supported")
         # GH 8745
         raw = ["a", "a", "b", "c", "c", "a"]
         dummies = get_dummies(raw, sparse=sparse)

From 4fb1e5ea1d90207bd7ee7265a7d8ff6c93ecc2cf Mon Sep 17 00:00:00 2001
From: Chris Barnes <barnesc@janelia.hhmi.org>
Date: Fri, 29 May 2020 16:33:51 +0100
Subject: [PATCH 16/47] Fix tests

---
 pandas/core/arrays/categorical.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 307f1413e7d5e..57ffdf560426f 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -388,6 +388,7 @@ def from_dummies(
         ----------
         dummies : DataFrame
             dtypes of columns with non-NA headers must be coercible to bool.
+            Sparse dataframes are not supported.
         ordered : bool
             Whether or not this Categorical is ordered.
 
@@ -429,7 +430,7 @@ def from_dummies(
             ...
         ValueError: 1 record(s) belongs to multiple categories: [0]
         """
-        to_drop = dummies.columns[dummies.columns.isna()]
+        to_drop = dummies.columns[dummies.columns.values.isna()]
         if len(to_drop):
             dummies = dummies.drop(columns=to_drop)
         df = dummies.astype("boolean")

From 9fa549411b24b9e3e722b0d1b09230cfd0ff6775 Mon Sep 17 00:00:00 2001
From: Chris Barnes <barnesc@janelia.hhmi.org>
Date: Fri, 29 May 2020 17:15:47 +0100
Subject: [PATCH 17/47] fix isna

---
 pandas/core/arrays/categorical.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 57ffdf560426f..1de927eada9ca 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -430,7 +430,7 @@ def from_dummies(
             ...
         ValueError: 1 record(s) belongs to multiple categories: [0]
         """
-        to_drop = dummies.columns[dummies.columns.values.isna()]
+        to_drop = dummies.columns[isna(dummies.columns.values)]
         if len(to_drop):
             dummies = dummies.drop(columns=to_drop)
         df = dummies.astype("boolean")

From 61567fd24a3807d154c5622298d01296d312c30d Mon Sep 17 00:00:00 2001
From: Chris Barnes <barnesc@janelia.hhmi.org>
Date: Fri, 29 May 2020 17:44:49 +0100
Subject: [PATCH 18/47] Explicit integer type cast

---
 pandas/core/arrays/categorical.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 1de927eada9ca..bd19c3f629882 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -448,9 +448,8 @@ def from_dummies(
         #  010            020    2    1
         #  001 * 1,2,3 => 003 -> 3 -> 2 = correct codes
         #  100            100    1    0
-        codes = (df * mult_by).sum(axis=1) - 1
-        codes[codes.isna()] = -1
-        return cls.from_codes(codes, df.columns.values, ordered=ordered)
+        codes = ((df * mult_by).sum(axis=1) - 1).astype("Int64")
+        return cls.from_codes(codes.fillna(-1), df.columns.values, ordered=ordered)
 
     def to_dummies(self, na_column=None) -> "DataFrame":
         """

From 5d724ccc85d771c4f30c47d018f737b4a353b5f2 Mon Sep 17 00:00:00 2001
From: Chris Barnes <barnesc@janelia.hhmi.org>
Date: Fri, 29 May 2020 17:46:14 +0100
Subject: [PATCH 19/47] Test categorical <-> dummies roundtrip

---
 pandas/tests/arrays/categorical/test_api.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/pandas/tests/arrays/categorical/test_api.py b/pandas/tests/arrays/categorical/test_api.py
index 0122fd581f43d..56aa03ee82a3e 100644
--- a/pandas/tests/arrays/categorical/test_api.py
+++ b/pandas/tests/arrays/categorical/test_api.py
@@ -422,6 +422,22 @@ def test_to_dummies_na_dtype(self):
         dummies = cats.to_dummies(na_column="other")
         assert dummies.columns.categories.dtype == object
 
+    @pytest.mark.parametrize(
+        "vals",
+        [
+            ["a", "b", "b", "a"],
+            ["a", "b", "b", "a", np.nan],
+            [1, 1.5, "a", (1, "b")],
+            [1, 1.5, "a", (1, "b"), np.nan],
+        ],
+    )
+    def test_dummies_roundtrip(self, vals):
+        # GH 8745
+        cats = Categorical(Series(vals))
+        dummies = cats.to_dummies()
+        cats2 = Categorical.from_dummies(dummies)
+        tm.assert_equal(cats, cats2)
+
 
 class TestCategoricalAPIWithFactor(TestCategorical):
     def test_describe(self):

From 1182ce52933e63df66b52d1b91c13ec8c46a8919 Mon Sep 17 00:00:00 2001
From: Chris Barnes <barnesc@janelia.hhmi.org>
Date: Fri, 29 May 2020 18:22:13 +0100
Subject: [PATCH 20/47] more type casts

---
 pandas/core/arrays/categorical.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index bd19c3f629882..9704a990400d2 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -448,7 +448,7 @@ def from_dummies(
         #  010            020    2    1
         #  001 * 1,2,3 => 003 -> 3 -> 2 = correct codes
         #  100            100    1    0
-        codes = ((df * mult_by).sum(axis=1) - 1).astype("Int64")
+        codes = ((df * mult_by).sum(axis=1, skipna=False) - 1).astype("Int64")
         return cls.from_codes(codes.fillna(-1), df.columns.values, ordered=ordered)
 
     def to_dummies(self, na_column=None) -> "DataFrame":

From 04ca72a3f423390fc2ea69e6e578a48dc3105dd4 Mon Sep 17 00:00:00 2001
From: Chris Barnes <barnesc@janelia.hhmi.org>
Date: Thu, 17 Sep 2020 10:59:12 +0100
Subject: [PATCH 21/47] Add wiki link for dummy variables

---
 doc/source/user_guide/categorical.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/user_guide/categorical.rst b/doc/source/user_guide/categorical.rst
index adf9d4f551d72..fe5fc3d734fe7 100644
--- a/doc/source/user_guide/categorical.rst
+++ b/doc/source/user_guide/categorical.rst
@@ -133,7 +133,7 @@ Dummy / indicator / one-hot encoded variables
 Some operations, like regression and classification,
 encodes a single categorical variable as a column for each category,
 with each row having False in all but one column (True).
-These are called dummy variables, or one-hot encoding.
+These are called `dummy variables <https://en.wikipedia.org/wiki/Dummy_variable_(statistics)>`_, or one-hot encoding.
 :class:`pandas.Categorical` objects can easily be converted to and from such an encoding:
 
 .. ipython:: python

From 6e4f71adafa5bbf3dfbffdde4e988707cc3405c2 Mon Sep 17 00:00:00 2001
From: Chris Barnes <barnesc@janelia.hhmi.org>
Date: Thu, 17 Sep 2020 10:59:47 +0100
Subject: [PATCH 22/47] Remove deprecated numpy <v1.16 check

---
 pandas/tests/arrays/categorical/test_constructors.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py
index 0b1f7819d9505..b4c3fe55133ae 100644
--- a/pandas/tests/arrays/categorical/test_constructors.py
+++ b/pandas/tests/arrays/categorical/test_constructors.py
@@ -3,8 +3,6 @@
 import numpy as np
 import pytest
 
-from pandas.compat.numpy import _np_version_under1p16
-
 from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype
 from pandas.core.dtypes.dtypes import CategoricalDtype
 
@@ -639,7 +637,6 @@ def test_constructor_imaginary(self):
         tm.assert_index_equal(c1.categories, Index(values))
         tm.assert_numpy_array_equal(np.array(c1), np.array(values))
 
-    @pytest.mark.skipif(_np_version_under1p16, reason="Skipping for NumPy <1.16")
     def test_constructor_string_and_tuples(self):
         # GH 21416
         c = pd.Categorical(np.array(["c", ("a", "b"), ("b", "a"), "c"], dtype=object))

From a761baf6e5d06e944dfce1a31c7ecc20c8f8a298 Mon Sep 17 00:00:00 2001
From: Chris Barnes <barnesc@janelia.hhmi.org>
Date: Thu, 17 Sep 2020 11:12:47 +0100
Subject: [PATCH 23/47] isort fix

---
 pandas/tests/series/indexing/test_datetime.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/tests/series/indexing/test_datetime.py b/pandas/tests/series/indexing/test_datetime.py
index 088f8681feb99..19d5dfa3b3900 100644
--- a/pandas/tests/series/indexing/test_datetime.py
+++ b/pandas/tests/series/indexing/test_datetime.py
@@ -11,6 +11,7 @@
 from pandas import DataFrame, DatetimeIndex, NaT, Series, Timestamp, date_range
 import pandas._testing as tm
 
+
 """
 Also test support for datetime64[ns] in Series / DataFrame
 """

From ed58c7799a423e60888a22ef0d1378807a897f4a Mon Sep 17 00:00:00 2001
From: Chris Barnes <barnesc@janelia.hhmi.org>
Date: Thu, 17 Sep 2020 12:04:06 +0100
Subject: [PATCH 24/47] undo changes to whatsnew v1.1.0

---
 doc/source/whatsnew/v1.1.0.rst | 27 +++++++++++++++++++++++----
 1 file changed, 23 insertions(+), 4 deletions(-)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index f2df6579ced70..a49b29d691692 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -318,10 +318,29 @@ Other enhancements
   compression library. Compression was also added to the low-level Stata-file writers
   :class:`~pandas.io.stata.StataWriter`, :class:`~pandas.io.stata.StataWriter117`,
   and :class:`~pandas.io.stata.StataWriterUTF8` (:issue:`26599`).
-- :meth:`HDFStore.put` now accepts `track_times` parameter. Parameter is passed to ``create_table`` method of ``PyTables`` (:issue:`32682`).
-- Make :class:`pandas.core.window.Rolling` and :class:`pandas.core.window.Expanding` iterable（:issue:`11704`)
-- Make ``option_context`` a :class:`contextlib.ContextDecorator`, which allows it to be used as a decorator over an entire function (:issue:`34253`).
-- :class:`~pandas.core.arrays.categorical.Categorical` now has methods for converting to and from dummy/ one-hot encoded variables: :meth:`Categorical.to_dummies` and :meth:`Categorical.from_dummies` respectively. :meth:`Categorical.to_dummies` is smaller in scope than :func:`~pandas.core.reshape.reshape.get_dummies`, which can still be used if you require the extra flexibility.
+- :meth:`HDFStore.put` now accepts a ``track_times`` parameter. This parameter is passed to the ``create_table`` method of ``PyTables`` (:issue:`32682`).
+- :meth:`Series.plot` and :meth:`DataFrame.plot` now accepts `xlabel` and `ylabel` parameters to present labels on x and y axis (:issue:`9093`).
+- Made :class:`pandas.core.window.rolling.Rolling` and :class:`pandas.core.window.expanding.Expanding` iterable（:issue:`11704`)
+- Made ``option_context`` a :class:`contextlib.ContextDecorator`, which allows it to be used as a decorator over an entire function (:issue:`34253`).
+- :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` now accept an ``errors`` argument (:issue:`22610`)
+- :meth:`~pandas.core.groupby.DataFrameGroupBy.groupby.transform` now allows ``func`` to be ``pad``, ``backfill`` and ``cumcount`` (:issue:`31269`).
+- :func:`read_json` now accepts an ``nrows`` parameter. (:issue:`33916`).
+- :meth:`DataFrame.hist`, :meth:`Series.hist`, :meth:`core.groupby.DataFrameGroupBy.hist`, and :meth:`core.groupby.SeriesGroupBy.hist` have gained the ``legend`` argument. Set to True to show a legend in the histogram. (:issue:`6279`)
+- :func:`concat` and :meth:`~DataFrame.append` now preserve extension dtypes, for example
+  combining a nullable integer column with a numpy integer column will no longer
+  result in object dtype but preserve the integer dtype (:issue:`33607`, :issue:`34339`, :issue:`34095`).
+- :func:`read_gbq` now allows to disable progress bar (:issue:`33360`).
+- :func:`read_gbq` now supports the ``max_results`` kwarg from ``pandas-gbq`` (:issue:`34639`).
+- :meth:`DataFrame.cov` and :meth:`Series.cov` now support a new parameter ``ddof`` to support delta degrees of freedom as in the corresponding numpy methods (:issue:`34611`).
+- :meth:`DataFrame.to_html` and :meth:`DataFrame.to_string`'s ``col_space`` parameter now accepts a list or dict to change only some specific columns' width (:issue:`28917`).
+- :meth:`DataFrame.to_excel` can now also write OpenOffice spreadsheet (.ods) files (:issue:`27222`)
+- :meth:`~Series.explode` now accepts ``ignore_index`` to reset the index, similar to :meth:`pd.concat` or :meth:`DataFrame.sort_values` (:issue:`34932`).
+- :meth:`DataFrame.to_markdown` and :meth:`Series.to_markdown` now accept ``index`` argument as an alias for tabulate's ``showindex`` (:issue:`32667`)
+- :meth:`read_csv` now accepts string values like "0", "0.0", "1", "1.0" as convertible to the nullable Boolean dtype (:issue:`34859`)
+- :class:`pandas.core.window.ExponentialMovingWindow` now supports a ``times`` argument that allows ``mean`` to be calculated with observations spaced by the timestamps in ``times`` (:issue:`34839`)
+- :meth:`DataFrame.agg` and :meth:`Series.agg` now accept named aggregation for renaming the output columns/indexes. (:issue:`26513`)
+- ``compute.use_numba`` now exists as a configuration option that utilizes the numba engine when available (:issue:`33966`, :issue:`35374`)
+- :meth:`Series.plot` now supports asymmetric error bars. Previously, if :meth:`Series.plot` received a "2xN" array with error values for `yerr` and/or `xerr`, the left/lower values (first row) were mirrored, while the right/upper values (second row) were ignored. Now, the first row represents the left/lower error values and the second row the right/upper error values. (:issue:`9536`)
 
 .. ---------------------------------------------------------------------------
 

From 741cf8f4f4006743ddcd8c2bc2c590c84ef8811c Mon Sep 17 00:00:00 2001
From: Chris Barnes <barnesc@janelia.hhmi.org>
Date: Thu, 17 Sep 2020 12:04:46 +0100
Subject: [PATCH 25/47] whatsnew/v1.2.0: Categorical get/from dummies

---
 doc/source/whatsnew/v1.2.0.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index 6a5b4b3b9ff16..c3ac951eb51b1 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -120,6 +120,7 @@ Other enhancements
 - `Styler` now allows direct CSS class name addition to individual data cells (:issue:`36159`)
 - :meth:`Rolling.mean()` and :meth:`Rolling.sum()` use Kahan summation to calculate the mean to avoid numerical problems (:issue:`10319`, :issue:`11645`, :issue:`13254`, :issue:`32761`, :issue:`36031`)
 - :meth:`DatetimeIndex.searchsorted`, :meth:`TimedeltaIndex.searchsorted`, :meth:`PeriodIndex.searchsorted`, and :meth:`Series.searchsorted` with datetimelike dtypes will now try to cast string arguments (listlike and scalar) to the matching datetimelike type (:issue:`36346`)
+- :meth:`Categorical.from_dummies` and :meth:`Categorical.get_dummies` convert between :class:`Categorical` and :class:`DataFrame` objects of dummy variables.
 
 .. _whatsnew_120.api_breaking.python:
 

From 6f199b6013a8ed79e07f271ccfbc277c4f04e06e Mon Sep 17 00:00:00 2001
From: Chris Barnes <barnesc@janelia.hhmi.org>
Date: Thu, 17 Sep 2020 12:05:44 +0100
Subject: [PATCH 26/47] Update user_guide/categorical docs

---
 doc/source/user_guide/categorical.rst | 32 +++++++++------------------
 1 file changed, 11 insertions(+), 21 deletions(-)

diff --git a/doc/source/user_guide/categorical.rst b/doc/source/user_guide/categorical.rst
index fe5fc3d734fe7..892c20bc64c51 100644
--- a/doc/source/user_guide/categorical.rst
+++ b/doc/source/user_guide/categorical.rst
@@ -134,17 +134,7 @@ Some operations, like regression and classification,
 encodes a single categorical variable as a column for each category,
 with each row having False in all but one column (True).
 These are called `dummy variables <https://en.wikipedia.org/wiki/Dummy_variable_(statistics)>`_, or one-hot encoding.
-:class:`pandas.Categorical` objects can easily be converted to and from such an encoding:
-
-.. ipython:: python
-
-    cat = pd.Categorical(["a", "b", "b", "c"])
-    cat
-
-    dummies = cat.to_dummies()
-    dummies
-
-    pd.Categorical.from_dummies(dummies)
+:class:`pandas.Categorical` objects can easily be converted to and from such an encoding.
 
 The :meth:`pandas.Categorical.from_dummies` class method accepts a dataframe
 whose dtypes are coercible to boolean, and an ``ordered`` argument
@@ -154,21 +144,21 @@ A column with a NA index will be ignored.
 Any row which is entirely falsey, or has a missing value,
 will be uncategorised.
 
-:meth:`pandas.Categorical.to_dummies` produces a boolean dataframe of dummy variables.
-If the ``na_column`` argument is ``None`` (default),
-missing items will result in a row of ``False``.
-Otherwise, the value of ``na_column`` will be used as the index
-of an extra column representing these items:
+:meth:`pandas.Categorical.get_dummies` produces a dataframe of dummy variables.
+It works in the same way and supports most of the same arguments as :func:`pandas.get_dummies`.
 
 .. ipython:: python
 
-    cat = pd.Categorical(["a", "b", np.nan])
-    cat.to_dummies(na_column="other")
+    cat = pd.Categorical(["a", "b", "b", "c"])
+    cat
+
+    dummies = cat.get_dummies()
+    dummies
+
+    pd.Categorical.from_dummies(dummies)
 
-For more control over data types and column names,
-see :func:`pandas.get_dummies`.
 
-.. versionadded:: 1.1.0
+.. versionadded:: 1.2.0
 
 Controlling behavior
 ~~~~~~~~~~~~~~~~~~~~

From 034f8e1efc15c6974ade669248ffaf5d008b9479 Mon Sep 17 00:00:00 2001
From: Chris Barnes <barnesc@janelia.hhmi.org>
Date: Thu, 17 Sep 2020 12:06:09 +0100
Subject: [PATCH 27/47] Reference Categorical.get_dummies in reshape.py

---
 pandas/core/reshape/reshape.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
index e848f968b64e4..be5a39a9f90d4 100644
--- a/pandas/core/reshape/reshape.py
+++ b/pandas/core/reshape/reshape.py
@@ -768,7 +768,7 @@ def get_dummies(
     See Also
     --------
     Series.str.get_dummies : Convert Series to dummy codes.
-    Categorical.to_dummies : Simply create dummy variables from a Categorical.
+    Categorical.get_dummies : Convert a Categorical array to dummy codes.
 
     Examples
     --------

From b80f089978ed92aba023f77ddcd3ab3a4a88a432 Mon Sep 17 00:00:00 2001
From: Chris Barnes <barnesc@janelia.hhmi.org>
Date: Thu, 17 Sep 2020 12:06:59 +0100
Subject: [PATCH 28/47] Categorical->dummies more like get_dummies

---
 pandas/core/arrays/categorical.py | 127 ++++++++++++++++++++----------
 1 file changed, 86 insertions(+), 41 deletions(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 9704a990400d2..4db55ee1871a8 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -451,58 +451,103 @@ def from_dummies(
         codes = ((df * mult_by).sum(axis=1, skipna=False) - 1).astype("Int64")
         return cls.from_codes(codes.fillna(-1), df.columns.values, ordered=ordered)
 
-    def to_dummies(self, na_column=None) -> "DataFrame":
-        """
-        Create a ``DataFrame`` of boolean dummy variables representing this object.
-
-        For more power over column names or to use a sparse matrix,
-        see :func:`pandas.get_dummies`.
+    def get_dummies(
+        self,
+        prefix=None,
+        prefix_sep="_",
+        dummy_na=False,
+        sparse=False,
+        drop_first=False,
+        dtype=None,
+    ) -> "DataFrame":
+        """
+        Convert into dummy/indicator variables.
 
         Parameters
         ----------
-        na_column : Optional
-            If None, NA values will be represented as a row of zeros.
-            Otherwise, this is the name of a new column representing
-            those NA values.
+        prefix : str, default None
+            String to append DataFrame column names.
+        prefix_sep : str, default '_'
+            If appending prefix, separator/delimiter to use.
+        dummy_na : bool, default False
+            Add a column to indicate NaNs, if False NaNs are ignored.
+        sparse : bool, default False
+            Whether the dummy-encoded columns should be backed by
+            a :class:`SparseArray` (True) or a regular NumPy array (False).
+        drop_first : bool, default False
+            Whether to get k-1 dummies out of k categorical levels by removing the
+            first level.
+        dtype : dtype, default np.uint8
+            Data type for new columns. Only a single dtype is allowed.
 
         Returns
         -------
         DataFrame
-
-        Examples
-        --------
-        >>> Categorical(["a", "b", "c"]).to_dummies()
-           a      b      c
-        0  True   False  False
-        1  False  True   False
-        2  False  False  True
-
-        >>> Categorical(["a", "b", np.nan]).to_dummies()
-           a      b
-        0  True   False
-        1  False  True
-        2  False  False
-
-        >>> Categorical(["a", "b", np.nan]).to_dummies("other")
-           a      b      other
-        0  True   False  False
-        1  False  True   False
-        2  False  False  True
+            Dummy-coded data.
 
         See Also
         --------
-        :func:`pandas.get_dummies`
-        """
-        from pandas import DataFrame, CategoricalIndex, Series
+        Series.str.get_dummies : Convert Series to dummy codes.
+        pandas.get_dummies : Convert categorical variable to dummy/indicator variables.
 
-        eye = np.eye(len(self.categories) + 1, dtype=bool)
-        arr = eye[self.codes, :]
-
-        if na_column is None:
-            return DataFrame(arr[:, :-1], columns=CategoricalIndex(self.categories))
-        else:
-            cats = CategoricalIndex(Series(list(self.categories) + [na_column]))
-            return DataFrame(arr, columns=cats)
+        Examples
+        --------
+        >>> s = pd.Categorical(list('abca'))
+
+        >>> s.get_dummies()
+        a  b  c
+        0  1  0  0
+        1  0  1  0
+        2  0  0  1
+        3  1  0  0
+
+        >>> s1 = pd.Categorical(['a', 'b', np.nan])
+
+        >>> s1.get_dummies()
+        a  b
+        0  1  0
+        1  0  1
+        2  0  0
+
+        >>> s1.get_dummies(dummy_na=True)
+        a  b  NaN
+        0  1  0    0
+        1  0  1    0
+        2  0  0    1
+
+        >>> pd.Categorical(list('abcaa)).get_dummies()
+        a  b  c
+        0  1  0  0
+        1  0  1  0
+        2  0  0  1
+        3  1  0  0
+        4  1  0  0
+
+        >>> pd.Categorical(list('abcaa)).get_dummies(drop_first=True)
+        b  c
+        0  0  0
+        1  1  0
+        2  0  1
+        3  0  0
+        4  0  0
+
+        >>> pd.Categorical(list('abc')).get_dummies(dtype=float)
+            a    b    c
+        0  1.0  0.0  0.0
+        1  0.0  1.0  0.0
+        2  0.0  0.0  1.0
+        """
+        from pandas import _get_dummies_1d
+
+        return _get_dummies_1d(
+            self,
+            prefix=prefix,
+            prefix_sep=prefix_sep,
+            dummy_na=dummy_na,
+            sparse=sparse,
+            drop_first=drop_first,
+            dtype=dtype,
+        )
 
     @property
     def dtype(self) -> CategoricalDtype:

From 0eb936f1c04813a0c5573e88b68969ea07f7d07d Mon Sep 17 00:00:00 2001
From: Chris Barnes <barnesc@janelia.hhmi.org>
Date: Thu, 17 Sep 2020 12:07:19 +0100
Subject: [PATCH 29/47] categorical tests

---
 pandas/tests/arrays/categorical/test_api.py | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/pandas/tests/arrays/categorical/test_api.py b/pandas/tests/arrays/categorical/test_api.py
index 56aa03ee82a3e..7fcb568c665ed 100644
--- a/pandas/tests/arrays/categorical/test_api.py
+++ b/pandas/tests/arrays/categorical/test_api.py
@@ -408,19 +408,10 @@ def test_remove_unused_categories(self):
             [1, 1.5, "a", (1, "b"), np.nan],
         ],
     )
-    def test_to_dummies(self, vals):
+    def test_get_dummies(self, vals):
         # GH 8745
         cats = Categorical(Series(vals))
-        tm.assert_equal(cats.to_dummies(), get_dummies(cats).astype(bool))
-
-    def test_to_dummies_na_dtype(self):
-        # when dtype of NA column name != dtype of categories,
-        # check the cast to object
-        # GH 8745
-        cats = Categorical([1, 2, 2, 1, np.nan])
-        assert cats.dtype != object
-        dummies = cats.to_dummies(na_column="other")
-        assert dummies.columns.categories.dtype == object
+        tm.assert_equal(cats.get_dummies(), get_dummies(cats))
 
     @pytest.mark.parametrize(
         "vals",

From 8f212e1d999937538ae7043a639a62a7f0aca13f Mon Sep 17 00:00:00 2001
From: Chris Barnes <barnesc@janelia.hhmi.org>
Date: Thu, 17 Sep 2020 12:08:05 +0100
Subject: [PATCH 30/47] isort pandas_web

---
 web/pandas_web.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/web/pandas_web.py b/web/pandas_web.py
index 7dd63175e69ac..e62deaa8cdc7f 100755
--- a/web/pandas_web.py
+++ b/web/pandas_web.py
@@ -34,12 +34,13 @@
 import time
 import typing
 
-import feedparser
 import jinja2
-import markdown
 import requests
 import yaml
 
+import feedparser
+import markdown
+
 
 class Preprocessors:
     """

From bda526512be6c16cececfb00eff4f1912795bde4 Mon Sep 17 00:00:00 2001
From: Chris Barnes <barnesc@janelia.hhmi.org>
Date: Thu, 17 Sep 2020 13:24:45 +0100
Subject: [PATCH 31/47] fix _get_dummies_1d import path

---
 pandas/core/arrays/categorical.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 4db55ee1871a8..3a241fca60eb2 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -537,7 +537,7 @@ def get_dummies(
         1  0.0  1.0  0.0
         2  0.0  0.0  1.0
         """
-        from pandas import _get_dummies_1d
+        from pandas.core.reshape.reshape import _get_dummies_1d
 
         return _get_dummies_1d(
             self,

From 6e6ddda82e0c79dd3afaa201e88d10b1ae6de118 Mon Sep 17 00:00:00 2001
From: Chris Barnes <barnesc@janelia.hhmi.org>
Date: Thu, 17 Sep 2020 13:25:38 +0100
Subject: [PATCH 32/47] categorical.test_api: to->get dummies

---
 pandas/tests/arrays/categorical/test_api.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/arrays/categorical/test_api.py b/pandas/tests/arrays/categorical/test_api.py
index 7fcb568c665ed..d47841618d6f0 100644
--- a/pandas/tests/arrays/categorical/test_api.py
+++ b/pandas/tests/arrays/categorical/test_api.py
@@ -425,7 +425,7 @@ def test_get_dummies(self, vals):
     def test_dummies_roundtrip(self, vals):
         # GH 8745
         cats = Categorical(Series(vals))
-        dummies = cats.to_dummies()
+        dummies = cats.get_dummies()
         cats2 = Categorical.from_dummies(dummies)
         tm.assert_equal(cats, cats2)
 

From 9fcebf066d487fabffda6a20afa9027fe17c9bc3 Mon Sep 17 00:00:00 2001
From: Chris Barnes <barnesc@janelia.hhmi.org>
Date: Thu, 17 Sep 2020 13:37:57 +0100
Subject: [PATCH 33/47] isort pandas_web

---
 web/pandas_web.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/web/pandas_web.py b/web/pandas_web.py
index e62deaa8cdc7f..7dd63175e69ac 100755
--- a/web/pandas_web.py
+++ b/web/pandas_web.py
@@ -34,13 +34,12 @@
 import time
 import typing
 
+import feedparser
 import jinja2
+import markdown
 import requests
 import yaml
 
-import feedparser
-import markdown
-
 
 class Preprocessors:
     """

From b9908c419f9e8d6c1f88ef4ce59097c145ccb4da Mon Sep 17 00:00:00 2001
From: Chris Barnes <barnesc@janelia.hhmi.org>
Date: Thu, 17 Sep 2020 13:51:34 +0100
Subject: [PATCH 34/47] fix typos in categorical doctests

---
 pandas/core/arrays/categorical.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 3a241fca60eb2..cf8b6c00e5a53 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -515,7 +515,7 @@ def get_dummies(
         1  0  1    0
         2  0  0    1
 
-        >>> pd.Categorical(list('abcaa)).get_dummies()
+        >>> pd.Categorical(list('abcaa')).get_dummies()
         a  b  c
         0  1  0  0
         1  0  1  0
@@ -523,7 +523,7 @@ def get_dummies(
         3  1  0  0
         4  1  0  0
 
-        >>> pd.Categorical(list('abcaa)).get_dummies(drop_first=True)
+        >>> pd.Categorical(list('abcaa')).get_dummies(drop_first=True)
         b  c
         0  0  0
         1  1  0

From faeec4170e0de9fdb344d905c0d55ec45bea1905 Mon Sep 17 00:00:00 2001
From: Chris Barnes <barnesc@janelia.hhmi.org>
Date: Thu, 17 Sep 2020 14:06:11 +0100
Subject: [PATCH 35/47] isort test_datetime

---
 pandas/tests/series/indexing/test_datetime.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/tests/series/indexing/test_datetime.py b/pandas/tests/series/indexing/test_datetime.py
index 19d5dfa3b3900..088f8681feb99 100644
--- a/pandas/tests/series/indexing/test_datetime.py
+++ b/pandas/tests/series/indexing/test_datetime.py
@@ -11,7 +11,6 @@
 from pandas import DataFrame, DatetimeIndex, NaT, Series, Timestamp, date_range
 import pandas._testing as tm
 
-
 """
 Also test support for datetime64[ns] in Series / DataFrame
 """

From e11f28e0f2b6c52c29f7121600e527d64a6bd856 Mon Sep 17 00:00:00 2001
From: Chris Barnes <barnesc@janelia.hhmi.org>
Date: Thu, 17 Sep 2020 14:06:21 +0100
Subject: [PATCH 36/47] use get_dummies instead of _get_dummies_1d

---
 pandas/core/arrays/categorical.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index cf8b6c00e5a53..aaf34006a9657 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -537,9 +537,11 @@ def get_dummies(
         1  0.0  1.0  0.0
         2  0.0  0.0  1.0
         """
-        from pandas.core.reshape.reshape import _get_dummies_1d
+        # Would be better to use pandas.core.reshape.reshape._get_dummies_1d
+        # but that's internal and fails lints
+        from pandas import get_dummies
 
-        return _get_dummies_1d(
+        return get_dummies(
             self,
             prefix=prefix,
             prefix_sep=prefix_sep,

From 742c940dd87c5979eadee429e2036a293177760b Mon Sep 17 00:00:00 2001
From: Chris Barnes <barnesc@janelia.hhmi.org>
Date: Thu, 17 Sep 2020 14:34:48 +0100
Subject: [PATCH 37/47] Reference get_dummies/ from_dummies in reshaping docs

---
 doc/source/user_guide/reshaping.rst | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/doc/source/user_guide/reshaping.rst b/doc/source/user_guide/reshaping.rst
index 1b90aeb00cf9c..a666bbd885baf 100644
--- a/doc/source/user_guide/reshaping.rst
+++ b/doc/source/user_guide/reshaping.rst
@@ -606,7 +606,7 @@ This function is often used along with discretization functions like ``cut``:
 
    pd.get_dummies(pd.cut(values, bins))
 
-See also :func:`Series.str.get_dummies <pandas.Series.str.get_dummies>`.
+See also :func:`Series.str.get_dummies <pandas.Series.str.get_dummies>` and :func:`Categorical.get_dummies <pandas.Categorical.get_dummies>`.
 
 :func:`get_dummies` also accepts a ``DataFrame``. By default all categorical
 variables (categorical in the statistical sense, those with `object` or
@@ -679,6 +679,15 @@ To choose another dtype, use the ``dtype`` argument:
 
     pd.get_dummies(df, dtype=bool).dtypes
 
+A :class:`~pandas.Categorical` can be recovered from a :class:`~pandas.DataFrame` of such dummy variables using :meth:`~pandas.Categorical.from_dummies`.
+Use the ``prefix`` and ``prefix_sep`` arguments to select and rename columns which have had a prefix applied in the same way as :class:`~pandas.get_dummies` does.
+
+.. ipython:: python
+
+    df = pd.get_dummies(list("abca"))
+
+    pd.Categorical.from_dummies(df)
+
 
 .. _reshaping.factorize:
 

From 722137d374639fbd5fc14c1b12b39b92a74f1640 Mon Sep 17 00:00:00 2001
From: Chris Barnes <barnesc@janelia.hhmi.org>
Date: Thu, 17 Sep 2020 14:35:05 +0100
Subject: [PATCH 38/47] use prefix in from_dummies

---
 pandas/core/arrays/categorical.py | 27 ++++++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index aaf34006a9657..6546b07b3c834 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -375,10 +375,17 @@ def __init__(
 
     @classmethod
     def from_dummies(
-        cls, dummies: "DataFrame", ordered: Optional[bool] = None
+        cls,
+        dummies: "DataFrame",
+        ordered: Optional[bool] = None,
+        prefix=None,
+        prefix_sep="_",
     ) -> "Categorical":
         """Create a `Categorical` using a ``DataFrame`` of dummy variables.
 
+        Can use a subset of columns based on the ``prefix``
+        and ``prefix_sep`` parameters.
+
         The ``DataFrame`` must have no more than one truthy value per row.
         The columns of the ``DataFrame`` become the categories of the `Categorical`.
         A column whose header is NA will be dropped:
@@ -391,6 +398,13 @@ def from_dummies(
             Sparse dataframes are not supported.
         ordered : bool
             Whether or not this Categorical is ordered.
+        prefix : optional str
+            Only take columns whose names are strings starting
+            with this prefix and ``prefix_sep``,
+            stripping those elements from the resulting category names.
+        prefix_sep : str, default "_"
+            If ``prefix`` is not ``None``, use as the separator
+            between the prefix and the final name of the category.
 
         Raises
         ------
@@ -433,6 +447,17 @@ def from_dummies(
         to_drop = dummies.columns[isna(dummies.columns.values)]
         if len(to_drop):
             dummies = dummies.drop(columns=to_drop)
+
+        if prefix is not None:
+            pref = prefix + (prefix_sep or "")
+            name_map = dict()
+            to_keep = []
+            for c in dummies.columns:
+                if isinstance(c, str) and c.startswith(pref):
+                    to_keep.append(c)
+                    name_map[c] = c[len(pref) :]
+            dummies = dummies[to_keep].rename(columns=name_map)
+
         df = dummies.astype("boolean")
 
         multicat_rows = df.sum(axis=1, skipna=False) > 1

From 4945ba8722d3e7ea365253cd43172538c17a73ba Mon Sep 17 00:00:00 2001
From: Chris Barnes <barnesc@janelia.hhmi.org>
Date: Thu, 17 Sep 2020 14:53:47 +0100
Subject: [PATCH 39/47] document prefix handling in categorical.rst

---
 doc/source/user_guide/categorical.rst | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/doc/source/user_guide/categorical.rst b/doc/source/user_guide/categorical.rst
index 892c20bc64c51..c8bdc2394ddf9 100644
--- a/doc/source/user_guide/categorical.rst
+++ b/doc/source/user_guide/categorical.rst
@@ -136,6 +136,16 @@ with each row having False in all but one column (True).
 These are called `dummy variables <https://en.wikipedia.org/wiki/Dummy_variable_(statistics)>`_, or one-hot encoding.
 :class:`pandas.Categorical` objects can easily be converted to and from such an encoding.
 
+:meth:`pandas.Categorical.get_dummies` produces a dataframe of dummy variables.
+It works in the same way and supports most of the same arguments as :func:`pandas.get_dummies`.
+
+.. ipython:: python
+
+    cat = pd.Categorical(["a", "b", "b", "c"])
+    cat
+
+    cat.get_dummies()
+
 The :meth:`pandas.Categorical.from_dummies` class method accepts a dataframe
 whose dtypes are coercible to boolean, and an ``ordered`` argument
 for whether the resulting ``Categorical`` should be considered ordered
@@ -143,19 +153,16 @@ for whether the resulting ``Categorical`` should be considered ordered
 A column with a NA index will be ignored.
 Any row which is entirely falsey, or has a missing value,
 will be uncategorised.
-
-:meth:`pandas.Categorical.get_dummies` produces a dataframe of dummy variables.
-It works in the same way and supports most of the same arguments as :func:`pandas.get_dummies`.
+In the same way that :func:`pandas.get_dummies` can add a prefix to string category names,
+:meth:`~pandas.Categorical.from_dummies` can filter a dataframe for columns with a prefix:
+the resulting ``Categorical`` will have the prefix stripped from its categories.
 
 .. ipython:: python
 
-    cat = pd.Categorical(["a", "b", "b", "c"])
-    cat
-
-    dummies = cat.get_dummies()
+    dummies = pd.get_dummies(["a", "b", "b", "c"], prefix="cat")
     dummies
 
-    pd.Categorical.from_dummies(dummies)
+    pd.Categorical.from_dummies(dummies, prefix="cat")
 
 
 .. versionadded:: 1.2.0

From 1f98233ba0dac0b6558ddc20cfd281a16f853165 Mon Sep 17 00:00:00 2001
From: Chris Barnes <barnesc@janelia.hhmi.org>
Date: Thu, 17 Sep 2020 16:12:06 +0100
Subject: [PATCH 40/47] Lower-memory impl for Categorical.from_dummies

---
 pandas/core/arrays/categorical.py | 39 +++++++++++++++++++++----------
 1 file changed, 27 insertions(+), 12 deletions(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 6546b07b3c834..cd0ccc2cdce76 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -380,6 +380,7 @@ def from_dummies(
         ordered: Optional[bool] = None,
         prefix=None,
         prefix_sep="_",
+        fillna=None,
     ) -> "Categorical":
         """Create a `Categorical` using a ``DataFrame`` of dummy variables.
 
@@ -405,6 +406,9 @@ def from_dummies(
         prefix_sep : str, default "_"
             If ``prefix`` is not ``None``, use as the separator
             between the prefix and the final name of the category.
+        fillna : optional bool, default None
+            How to handle NA values. If ``True`` or ``False``, NA is filled with that value.
+            If ``None``, raise a ValueError if there are any NA values.
 
         Raises
         ------
@@ -444,23 +448,35 @@ def from_dummies(
             ...
         ValueError: 1 record(s) belongs to multiple categories: [0]
         """
+        from pandas import Series
+
+        copied = False
         to_drop = dummies.columns[isna(dummies.columns.values)]
         if len(to_drop):
             dummies = dummies.drop(columns=to_drop)
+            copied = True
 
-        if prefix is not None:
+        if prefix is None:
+            cats = dummies.columns
+        else:
             pref = prefix + (prefix_sep or "")
-            name_map = dict()
+            cats = []
             to_keep = []
             for c in dummies.columns:
                 if isinstance(c, str) and c.startswith(pref):
                     to_keep.append(c)
-                    name_map[c] = c[len(pref) :]
-            dummies = dummies[to_keep].rename(columns=name_map)
+                    cats.append(c[len(pref) :])
+            dummies = dummies[to_keep]
 
         df = dummies.astype("boolean")
+        if fillna is not None:
+            df = df.fillna(fillna, inplace=copied)
 
-        multicat_rows = df.sum(axis=1, skipna=False) > 1
+        row_totals = df.sum(axis=1, skipna=False)
+        if row_totals.isna().any():
+            raise ValueError("Unhandled NA values in dummy array")
+
+        multicat_rows = row_totals > 1
         if multicat_rows.any():
             raise ValueError(
                 "{} record(s) belongs to multiple categories: {}".format(
@@ -468,13 +484,12 @@ def from_dummies(
                 )
             )
 
-        mult_by = np.arange(df.shape[1]) + 1
-        #  000            000    0   -1
-        #  010            020    2    1
-        #  001 * 1,2,3 => 003 -> 3 -> 2 = correct codes
-        #  100            100    1    0
-        codes = ((df * mult_by).sum(axis=1, skipna=False) - 1).astype("Int64")
-        return cls.from_codes(codes.fillna(-1), df.columns.values, ordered=ordered)
+        codes = Series(np.full(len(row_totals), np.nan), index=df.index, dtype="Int64")
+        codes[row_totals == 0] = -1
+        row_idx, code = np.nonzero(df)
+        codes[row_idx] = code
+
+        return cls.from_codes(codes.fillna(-1), cats, ordered=ordered)
 
     def get_dummies(
         self,

From ff01048275e900a07e3e73fba4d3cd2ebfd1032b Mon Sep 17 00:00:00 2001
From: Chris Barnes <barnesc@janelia.hhmi.org>
Date: Tue, 22 Sep 2020 15:57:24 +0100
Subject: [PATCH 41/47] remove comment about use of _get_dummies_1d

---
 pandas/core/arrays/categorical.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index cd0ccc2cdce76..ba4c8a9552b44 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -577,8 +577,6 @@ def get_dummies(
         1  0.0  1.0  0.0
         2  0.0  0.0  1.0
         """
-        # Would be better to use pandas.core.reshape.reshape._get_dummies_1d
-        # but that's internal and fails lints
         from pandas import get_dummies
 
         return get_dummies(

From 604b8397b54baea3f8b1dec1be0eb4a397dfc1f8 Mon Sep 17 00:00:00 2001
From: Chris Barnes <barnesc@janelia.hhmi.org>
Date: Tue, 22 Sep 2020 16:02:40 +0100
Subject: [PATCH 42/47] type-annotate get/from_dummies

---
 pandas/core/arrays/categorical.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index ba4c8a9552b44..6002dc83c780e 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -378,9 +378,9 @@ def from_dummies(
         cls,
         dummies: "DataFrame",
         ordered: Optional[bool] = None,
-        prefix=None,
-        prefix_sep="_",
-        fillna=None,
+        prefix: Optional[str] = None,
+        prefix_sep: str = "_",
+        fillna: Optional[bool] = None,
     ) -> "Categorical":
         """Create a `Categorical` using a ``DataFrame`` of dummy variables.
 
@@ -493,12 +493,12 @@ def from_dummies(
 
     def get_dummies(
         self,
-        prefix=None,
-        prefix_sep="_",
-        dummy_na=False,
-        sparse=False,
-        drop_first=False,
-        dtype=None,
+        prefix: Optional[str] = None,
+        prefix_sep: str = "_",
+        dummy_na: bool = False,
+        sparse: bool = False,
+        drop_first: bool = False,
+        dtype: Dtype = None,
     ) -> "DataFrame":
         """
         Convert into dummy/indicator variables.

From c71e8076f65fd092dc17edef9c28dedcab17dd26 Mon Sep 17 00:00:00 2001
From: Chris Barnes <barnesc@janelia.hhmi.org>
Date: Tue, 22 Sep 2020 16:03:16 +0100
Subject: [PATCH 43/47] split overlong line

---
 pandas/core/arrays/categorical.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 6002dc83c780e..b22f48cdda78a 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -407,7 +407,8 @@ def from_dummies(
             If ``prefix`` is not ``None``, use as the separator
             between the prefix and the final name of the category.
         fillna : optional bool, default None
-            How to handle NA values. If ``True`` or ``False``, NA is filled with that value.
+            How to handle NA values.
+            If ``True`` or ``False``, NA is filled with that value.
             If ``None``, raise a ValueError if there are any NA values.
 
         Raises

From 6f9272a3722577da8df11e940bab8034b05dec86 Mon Sep 17 00:00:00 2001
From: Chris Barnes <barnesc@janelia.hhmi.org>
Date: Tue, 22 Sep 2020 16:07:42 +0100
Subject: [PATCH 44/47] blacken

---
 pandas/core/arrays/categorical.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index b22f48cdda78a..676c10eed6f02 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -451,11 +451,9 @@ def from_dummies(
         """
         from pandas import Series
 
-        copied = False
         to_drop = dummies.columns[isna(dummies.columns.values)]
         if len(to_drop):
             dummies = dummies.drop(columns=to_drop)
-            copied = True
 
         if prefix is None:
             cats = dummies.columns
@@ -471,7 +469,7 @@ def from_dummies(
 
         df = dummies.astype("boolean")
         if fillna is not None:
-            df = df.fillna(fillna, inplace=copied)
+            df = df.fillna(fillna)
 
         row_totals = df.sum(axis=1, skipna=False)
         if row_totals.isna().any():
@@ -481,7 +479,8 @@ def from_dummies(
         if multicat_rows.any():
             raise ValueError(
                 "{} record(s) belongs to multiple categories: {}".format(
-                    multicat_rows.sum(), list(df.index[multicat_rows]),
+                    multicat_rows.sum(),
+                    list(df.index[multicat_rows]),
                 )
             )
 

From 8fd4b721f3ed5f5c96d4dc07327ca5a26fd93340 Mon Sep 17 00:00:00 2001
From: Chris Barnes <barnesc@janelia.hhmi.org>
Date: Tue, 22 Sep 2020 16:16:31 +0100
Subject: [PATCH 45/47] use f-strings

---
 pandas/core/arrays/categorical.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 676c10eed6f02..e65bd6e8d9f92 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -478,10 +478,8 @@ def from_dummies(
         multicat_rows = row_totals > 1
         if multicat_rows.any():
             raise ValueError(
-                "{} record(s) belongs to multiple categories: {}".format(
-                    multicat_rows.sum(),
-                    list(df.index[multicat_rows]),
-                )
+                f"{multicat_rows.sum()} record(s) belongs to multiple categories: "
+                f"{list(df.index[multicat_rows])}"
             )
 
         codes = Series(np.full(len(row_totals), np.nan), index=df.index, dtype="Int64")

From 534bc332ebfbf42047609b98c3faaaee59508bca Mon Sep 17 00:00:00 2001
From: Chris Barnes <barnesc@janelia.hhmi.org>
Date: Tue, 22 Sep 2020 16:18:01 +0100
Subject: [PATCH 46/47] add some typing

---
 pandas/core/arrays/categorical.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index e65bd6e8d9f92..d018968274b75 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -2,7 +2,7 @@
 from functools import partial
 import operator
 from shutil import get_terminal_size
-from typing import TYPE_CHECKING, Dict, Hashable, List, Optional, Type, Union, cast
+from typing import TYPE_CHECKING, Any, Dict, Hashable, List, Optional, Type, Union, cast
 from warnings import warn
 
 import numpy as np
@@ -455,12 +455,13 @@ def from_dummies(
         if len(to_drop):
             dummies = dummies.drop(columns=to_drop)
 
+        cats: List[Any]
         if prefix is None:
-            cats = dummies.columns
+            cats = list(dummies.columns)
         else:
             pref = prefix + (prefix_sep or "")
             cats = []
-            to_keep = []
+            to_keep: List[str] = []
             for c in dummies.columns:
                 if isinstance(c, str) and c.startswith(pref):
                     to_keep.append(c)

From 0facec6e6b01a89ff8573ad25b1575baeb4f9225 Mon Sep 17 00:00:00 2001
From: Chris Barnes <barnesc@janelia.hhmi.org>
Date: Tue, 22 Sep 2020 16:22:00 +0100
Subject: [PATCH 47/47] remove unnecessary .values

---
 pandas/core/arrays/categorical.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index d018968274b75..224e336fae9dd 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -451,7 +451,7 @@ def from_dummies(
         """
         from pandas import Series
 
-        to_drop = dummies.columns[isna(dummies.columns.values)]
+        to_drop = dummies.columns[isna(dummies.columns)]
         if len(to_drop):
             dummies = dummies.drop(columns=to_drop)