diff --git a/doc/source/whatsnew/v0.25.1.rst b/doc/source/whatsnew/v0.25.1.rst index dfa216b1db56e3..00bd4862f9acdf 100644 --- a/doc/source/whatsnew/v0.25.1.rst +++ b/doc/source/whatsnew/v0.25.1.rst @@ -156,7 +156,7 @@ ExtensionArray Other ^^^^^ - Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` when replacing timezone-aware timestamps using a dict-like replacer (:issue:`27720`) -- +- The returned dtype of ::func:`pd.unique` now matches the input dtype. (:issue`27874`) - - diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 21d12d02c90080..457cb87b1e6978 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -404,7 +404,7 @@ def unique(values): table = htable(len(values)) uniques = table.unique(values) - uniques = _reconstruct_data(uniques, dtype, original) + uniques = _reconstruct_data(uniques, original.dtype, original) return uniques diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index c760c75e44f6bd..483122a0eeabaf 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -159,8 +159,8 @@ def test_memory_usage(self): class Ops: def _allow_na_ops(self, obj): """Whether to skip test cases including NaN""" - if isinstance(obj, Index) and (obj.is_boolean() or not obj._can_hold_na): - # don't test boolean / int64 index + if (isinstance(obj, Index) and obj.is_boolean()) or not obj._can_hold_na: + # don't test boolean / integer dtypes return False return True @@ -187,7 +187,24 @@ def setup_method(self, method): types = ["bool", "int", "float", "dt", "dt_tz", "period", "string", "unicode"] self.indexes = [getattr(self, "{}_index".format(t)) for t in types] self.series = [getattr(self, "{}_series".format(t)) for t in types] - self.objs = self.indexes + self.series + + # To test narrow dtypes, we use narrower *data* elements, not *index* elements + index = self.int_index + self.float32_series = Series(arr.astype(np.float32), index=index, name="a") + + arr_int = np.random.choice(10, size=10, replace=False) + self.int8_series = Series(arr_int.astype(np.int8), index=index, name="a") + self.int16_series = Series(arr_int.astype(np.int16), index=index, name="a") + self.int32_series = Series(arr_int.astype(np.int32), index=index, name="a") + + self.uint8_series = Series(arr_int.astype(np.uint8), index=index, name="a") + self.uint16_series = Series(arr_int.astype(np.uint16), index=index, name="a") + self.uint32_series = Series(arr_int.astype(np.uint32), index=index, name="a") + + nrw_types = ["float32", "int8", "int16", "int32", "uint8", "uint16", "uint32"] + self.narrow_series = [getattr(self, "{}_series".format(t)) for t in nrw_types] + + self.objs = self.indexes + self.series + self.narrow_series def check_ops_properties(self, props, filter=None, ignore_failures=False): for op in props: @@ -385,6 +402,7 @@ def test_value_counts_unique_nunique(self): if isinstance(o, Index): assert isinstance(result, o.__class__) tm.assert_index_equal(result, orig) + assert result.dtype == orig.dtype elif is_datetime64tz_dtype(o): # datetimetz Series returns array of Timestamp assert result[0] == orig[0] @@ -396,6 +414,7 @@ def test_value_counts_unique_nunique(self): ) else: tm.assert_numpy_array_equal(result, orig.values) + assert result.dtype == orig.dtype assert o.nunique() == len(np.unique(o.values)) @@ -904,7 +923,7 @@ def test_fillna(self): expected = [fill_value] * 2 + list(values[2:]) - expected = klass(expected) + expected = klass(expected, dtype=orig.dtype) o = klass(values) # check values has the same dtype as the original