From 75ef96fb79eb72c89583e59f290c6f934dca5e5c Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 18 Dec 2019 14:54:26 -0800 Subject: [PATCH 1/6] BUG: raise on non-hashable Index name, closes #29069 --- pandas/core/indexes/base.py | 17 +++++++++++++++-- pandas/core/indexes/category.py | 5 ++--- pandas/core/indexes/datetimes.py | 5 ++--- pandas/core/indexes/interval.py | 4 ++-- pandas/core/indexes/numeric.py | 10 +++++++--- pandas/core/indexes/period.py | 9 ++++++--- pandas/core/indexes/range.py | 4 ++-- pandas/core/indexes/timedeltas.py | 3 ++- pandas/tests/indexes/common.py | 7 +++++++ pandas/tests/indexes/test_base.py | 1 + 10 files changed, 46 insertions(+), 19 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 5abd049b9564c..f8068fb2a864c 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -274,8 +274,7 @@ def __new__( from .interval import IntervalIndex from .category import CategoricalIndex - if name is None and hasattr(data, "name"): - name = data.name + name = maybe_extract_name(name, data) if isinstance(data, ABCPandasArray): # ensure users don't accidentally put a PandasArray in an index. @@ -5464,3 +5463,17 @@ def default_index(n): from pandas.core.indexes.range import RangeIndex return RangeIndex(0, n, name=None) + + +def maybe_extract_name(name, obj): + """ + If no name is passed, then extract it from data, validating hashability. + """ + if name is None and hasattr(obj, "name"): + name = obj.name + + # GH#29069 + if not is_hashable(name): + raise TypeError(f"Index.name must be a hashable type") + + return name diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 44478d00da9cf..aedf6fa31fdae 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -29,7 +29,7 @@ from pandas.core.base import _shared_docs import pandas.core.common as com import pandas.core.indexes.base as ibase -from pandas.core.indexes.base import Index, _index_shared_docs +from pandas.core.indexes.base import Index, _index_shared_docs, maybe_extract_name import pandas.core.missing as missing from pandas.core.ops import get_op_result_name @@ -175,8 +175,7 @@ def __new__( dtype = CategoricalDtype._from_values_or_dtype(data, categories, ordered, dtype) - if name is None and hasattr(data, "name"): - name = data.name + name = maybe_extract_name(name, data) if not is_categorical_dtype(data): # don't allow scalars diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 523c434cb7377..6c68432fddb92 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -30,7 +30,7 @@ ) from pandas.core.base import _shared_docs import pandas.core.common as com -from pandas.core.indexes.base import Index +from pandas.core.indexes.base import Index, maybe_extract_name from pandas.core.indexes.datetimelike import ( DatetimeIndexOpsMixin, DatetimelikeDelegateMixin, @@ -253,8 +253,7 @@ def __new__( # - Cases checked above all return/raise before reaching here - # - if name is None and hasattr(data, "name"): - name = data.name + name = maybe_extract_name(name, data) dtarr = DatetimeArray._from_sequence( data, diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index f046f0d89c428..8c6eb64ae9943 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -47,6 +47,7 @@ _index_shared_docs, default_pprint, ensure_index, + maybe_extract_name, ) from pandas.core.indexes.datetimes import DatetimeIndex, date_range from pandas.core.indexes.multi import MultiIndex @@ -213,8 +214,7 @@ def __new__( cls, data, closed=None, dtype=None, copy=False, name=None, verify_integrity=True ): - if name is None and hasattr(data, "name"): - name = data.name + name = maybe_extract_name(name, data) with rewrite_exception("IntervalArray", cls.__name__): array = IntervalArray( diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 048bff46759bc..ee04ee3918c7a 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -30,7 +30,12 @@ from pandas._typing import Dtype from pandas.core import algorithms import pandas.core.common as com -from pandas.core.indexes.base import Index, InvalidIndexError, _index_shared_docs +from pandas.core.indexes.base import ( + Index, + InvalidIndexError, + _index_shared_docs, + maybe_extract_name, +) from pandas.core.ops import get_op_result_name _num_index_shared_docs = dict() @@ -68,8 +73,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None): else: subarr = data - if name is None and hasattr(data, "name"): - name = data.name + name = maybe_extract_name(name, data) return cls._simple_new(subarr, name=name) @classmethod diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 9485116a8084a..6ef55d79e20c5 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -25,7 +25,11 @@ from pandas.core.base import _shared_docs import pandas.core.common as com import pandas.core.indexes.base as ibase -from pandas.core.indexes.base import _index_shared_docs, ensure_index +from pandas.core.indexes.base import ( + _index_shared_docs, + ensure_index, + maybe_extract_name, +) from pandas.core.indexes.datetimelike import ( DatetimeIndexOpsMixin, DatetimelikeDelegateMixin, @@ -184,8 +188,7 @@ def __new__( argument = list(set(fields) - valid_field_set)[0] raise TypeError(f"__new__() got an unexpected keyword argument {argument}") - if name is None and hasattr(data, "name"): - name = data.name + name = maybe_extract_name(name, data) if data is None and ordinal is None: # range-based. diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 6ad70841a48b0..68cb03891bafe 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -26,7 +26,7 @@ import pandas.core.common as com from pandas.core.construction import extract_array import pandas.core.indexes.base as ibase -from pandas.core.indexes.base import Index, _index_shared_docs +from pandas.core.indexes.base import Index, _index_shared_docs, maybe_extract_name from pandas.core.indexes.numeric import Int64Index from pandas.core.ops.common import unpack_zerodim_and_defer @@ -85,10 +85,10 @@ def __new__( ): cls._validate_dtype(dtype) + name = maybe_extract_name(name, start) # RangeIndex if isinstance(start, RangeIndex): - name = start.name if name is None else name start = start._range return cls._simple_new(start, dtype=dtype, name=name) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 889075ebe4e31..f4f2cb31cccde 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -25,7 +25,7 @@ from pandas.core.arrays.timedeltas import TimedeltaArray, _is_convertible_to_td from pandas.core.base import _shared_docs import pandas.core.common as com -from pandas.core.indexes.base import Index, _index_shared_docs +from pandas.core.indexes.base import Index, _index_shared_docs, maybe_extract_name from pandas.core.indexes.datetimelike import ( DatetimeIndexOpsMixin, DatetimelikeDelegateMixin, @@ -163,6 +163,7 @@ def __new__( copy=False, name=None, ): + name = maybe_extract_name(name, data) if is_scalar(data): raise TypeError( diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 102949fe3f05e..68cca473d6bb0 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -103,6 +103,13 @@ def test_shift(self): with pytest.raises(NotImplementedError, match=msg): idx.shift(1, 2) + def test_constructor_name_unhashable(self): + # GH#29069 check that name is hashable + # See also same-named test in tests.series.test_constructors + idx = self.create_index() + with pytest.raises(TypeError, match="Index.name must be a hashable type"): + type(idx)(idx, name=[]) + def test_create_index_existing_name(self): # GH11193, when an existing index is passed, and a new name is not diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index d4d644e486478..6ec35a32d74ce 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -100,6 +100,7 @@ def test_constructor_copy(self, index): arr[0] = "SOMEBIGLONGSTRING" assert new_index[0] != "SOMEBIGLONGSTRING" + # FIXME: dont leave commented-out # what to do here? # arr = np.array(5.) # pytest.raises(Exception, arr.view, Index) From bac89e567e33ce2179e9348b697397410a9a9ccb Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 18 Dec 2019 14:55:14 -0800 Subject: [PATCH 2/6] no fstring --- pandas/core/indexes/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index f8068fb2a864c..2438c58b5b094 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5474,6 +5474,6 @@ def maybe_extract_name(name, obj): # GH#29069 if not is_hashable(name): - raise TypeError(f"Index.name must be a hashable type") + raise TypeError("Index.name must be a hashable type") return name From 251a1a86755557eae167cd4cddde2935f784a5a4 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 20 Dec 2019 10:29:18 -0800 Subject: [PATCH 3/6] make name a property with setter --- doc/source/whatsnew/v1.0.0.rst | 2 +- pandas/core/indexes/base.py | 27 +++++++++++++++++++-------- pandas/core/series.py | 6 ++---- 3 files changed, 22 insertions(+), 13 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index a15d5b319fc82..eab75049ea233 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -890,7 +890,7 @@ Other - Fixed :class:`IntegerArray` returning ``inf`` rather than ``NaN`` for operations dividing by 0 (:issue:`27398`) - Fixed ``pow`` operations for :class:`IntegerArray` when the other value is ``0`` or ``1`` (:issue:`29997`) - Bug in :meth:`Series.count` raises if use_inf_as_na is enabled (:issue:`29478`) - +- Bug in :class:`Index` where a non-hashable name could be set without raising ``TypeError`` (:issue:29069`) .. _whatsnew_1000.contributors: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 2438c58b5b094..abc93521e0474 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -239,7 +239,7 @@ def _outer_indexer(self, left, right): _typ = "index" _data: Union[ExtensionArray, np.ndarray] _id = None - name = None + _name = None _comparables = ["name"] _attributes = ["name"] _is_numeric_dtype = False @@ -519,7 +519,7 @@ def _simple_new(cls, values, name=None, dtype=None): # data buffers and strides. We don't re-use `_ndarray_values`, since # we actually set this value too. result._index_data = values - result.name = name + result._name = name return result._reset_identity() @@ -1209,6 +1209,15 @@ def to_frame(self, index=True, name=None): # -------------------------------------------------------------------- # Name-Centric Methods + @property + def name(self): + return self._name + + @name.setter + def name(self, value): + maybe_extract_name(value, None) + self._name = value + def _validate_names(self, name=None, names=None, deep=False): """ Handles the quirks of having a singular 'name' parameter for general @@ -1258,7 +1267,7 @@ def _set_names(self, values, level=None): for name in values: if not is_hashable(name): raise TypeError(f"{type(self).__name__}.name must be a hashable type") - self.name = values[0] + self._name = values[0] names = property(fset=_set_names, fget=_get_names) @@ -1547,7 +1556,7 @@ def droplevel(self, level=0): if mask.any(): result = result.putmask(mask, np.nan) - result.name = new_names[0] + result._name = new_names[0] return result else: from .multi import MultiIndex @@ -1778,7 +1787,7 @@ def __setstate__(self, state): nd_state, own_state = state data = np.empty(nd_state[1], dtype=nd_state[2]) np.ndarray.__setstate__(data, nd_state) - self.name = own_state[0] + self._name = own_state[0] else: # pragma: no cover data = np.empty(state) @@ -5465,15 +5474,17 @@ def default_index(n): return RangeIndex(0, n, name=None) -def maybe_extract_name(name, obj): +def maybe_extract_name(name, obj, cls_name="Index"): """ If no name is passed, then extract it from data, validating hashability. """ - if name is None and hasattr(obj, "name"): + if name is None and isinstance(obj, (Index, ABCSeries)): + # Note we don't just check for "name" attribute since that would + # pick up e.g. dtype.name name = obj.name # GH#29069 if not is_hashable(name): - raise TypeError("Index.name must be a hashable type") + raise TypeError(f"{cls_name}.name must be a hashable type") return name diff --git a/pandas/core/series.py b/pandas/core/series.py index 54c163330e6ee..eff8d4e1eff6b 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -194,6 +194,8 @@ def __init__( else: + name = ibase.maybe_extract_name(name, data, "Series") + if is_empty_data(data) and dtype is None: # gh-17261 warnings.warn( @@ -219,8 +221,6 @@ def __init__( "initializing a Series from a MultiIndex is not supported" ) elif isinstance(data, Index): - if name is None: - name = data.name if dtype is not None: # astype copies @@ -237,8 +237,6 @@ def __init__( elif isinstance(data, np.ndarray): pass elif isinstance(data, ABCSeries): - if name is None: - name = data.name if index is None: index = data.index else: From c72fd0b0732cbfb75b53a45d5a64b7688efefcfc Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 20 Dec 2019 13:48:19 -0800 Subject: [PATCH 4/6] mypy fixup --- pandas/core/indexes/base.py | 4 ++-- pandas/core/indexes/datetimelike.py | 1 - pandas/core/indexes/timedeltas.py | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index abc93521e0474..0fb0c49f0d8a3 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1,7 +1,7 @@ from datetime import datetime import operator from textwrap import dedent -from typing import FrozenSet, Union +from typing import FrozenSet, Hashable, Optional, Union import warnings import numpy as np @@ -239,7 +239,7 @@ def _outer_indexer(self, left, right): _typ = "index" _data: Union[ExtensionArray, np.ndarray] _id = None - _name = None + _name: Optional[Hashable] = None _comparables = ["name"] _attributes = ["name"] _is_numeric_dtype = False diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 50dbddec5c8b2..d3db91b65dea4 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -813,7 +813,6 @@ class DatetimelikeDelegateMixin(PandasDelegate): _raw_methods: Set[str] = set() # raw_properties : dispatch properties that shouldn't be boxed in an Index _raw_properties: Set[str] = set() - name = None _data: ExtensionArray @property diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index f4f2cb31cccde..cebac99c46a27 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -211,7 +211,7 @@ def _simple_new(cls, values, name=None, freq=None, dtype=_TD_DTYPE): tdarr = TimedeltaArray._simple_new(values._data, freq=freq) result = object.__new__(cls) result._data = tdarr - result.name = name + result._name = name # For groupby perf. See note in indexes/base about _index_data result._index_data = tdarr._data From 695ee033dfc93bdf66b7293a68de48137250e658 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 24 Dec 2019 09:35:40 -0800 Subject: [PATCH 5/6] Address comments --- pandas/core/indexes/base.py | 8 ++++---- pandas/core/indexes/category.py | 2 +- pandas/core/indexes/datetimes.py | 2 +- pandas/core/indexes/interval.py | 2 +- pandas/core/indexes/numeric.py | 2 +- pandas/core/indexes/period.py | 2 +- pandas/core/indexes/range.py | 2 +- pandas/core/indexes/timedeltas.py | 2 +- pandas/core/series.py | 2 +- 9 files changed, 12 insertions(+), 12 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 87466985d1efd..ffd3c70efe744 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -274,7 +274,7 @@ def __new__( from .interval import IntervalIndex from .category import CategoricalIndex - name = maybe_extract_name(name, data) + name = maybe_extract_name(name, data, cls) if isinstance(data, ABCPandasArray): # ensure users don't accidentally put a PandasArray in an index. @@ -1214,7 +1214,7 @@ def name(self): @name.setter def name(self, value): - maybe_extract_name(value, None) + maybe_extract_name(value, None, type(self)) self._name = value def _validate_names(self, name=None, names=None, deep=False): @@ -5472,7 +5472,7 @@ def default_index(n): return RangeIndex(0, n, name=None) -def maybe_extract_name(name, obj, cls_name="Index"): +def maybe_extract_name(name, obj, cls) -> Optional[Hashable]: """ If no name is passed, then extract it from data, validating hashability. """ @@ -5483,6 +5483,6 @@ def maybe_extract_name(name, obj, cls_name="Index"): # GH#29069 if not is_hashable(name): - raise TypeError(f"{cls_name}.name must be a hashable type") + raise TypeError(f"{cls.__name__}.name must be a hashable type") return name diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 2840fb1456d63..ba476f9e25ee6 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -175,7 +175,7 @@ def __new__( dtype = CategoricalDtype._from_values_or_dtype(data, categories, ordered, dtype) - name = maybe_extract_name(name, data) + name = maybe_extract_name(name, data, cls) if not is_categorical_dtype(data): # don't allow scalars diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index fa25151f1b5e6..d226cf7d940f1 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -254,7 +254,7 @@ def __new__( # - Cases checked above all return/raise before reaching here - # - name = maybe_extract_name(name, data) + name = maybe_extract_name(name, data, cls) dtarr = DatetimeArray._from_sequence( data, diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 0547af3949656..ea2fe7cfdb071 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -218,7 +218,7 @@ def __new__( verify_integrity: bool = True, ): - name = maybe_extract_name(name, data) + name = maybe_extract_name(name, data, cls, cls) with rewrite_exception("IntervalArray", cls.__name__): array = IntervalArray( diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 8c347225c2eae..39cbe5f151262 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -73,7 +73,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None): else: subarr = data - name = maybe_extract_name(name, data) + name = maybe_extract_name(name, data, cls) return cls._simple_new(subarr, name=name) @classmethod diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 6ef55d79e20c5..6465a0c1724af 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -188,7 +188,7 @@ def __new__( argument = list(set(fields) - valid_field_set)[0] raise TypeError(f"__new__() got an unexpected keyword argument {argument}") - name = maybe_extract_name(name, data) + name = maybe_extract_name(name, data, cls) if data is None and ordinal is None: # range-based. diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 68cb03891bafe..225cd43bbd588 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -85,7 +85,7 @@ def __new__( ): cls._validate_dtype(dtype) - name = maybe_extract_name(name, start) + name = maybe_extract_name(name, start, cls) # RangeIndex if isinstance(start, RangeIndex): diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index cebac99c46a27..2569a510fdffb 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -163,7 +163,7 @@ def __new__( copy=False, name=None, ): - name = maybe_extract_name(name, data) + name = maybe_extract_name(name, data, cls) if is_scalar(data): raise TypeError( diff --git a/pandas/core/series.py b/pandas/core/series.py index eff8d4e1eff6b..a8aab5f216458 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -194,7 +194,7 @@ def __init__( else: - name = ibase.maybe_extract_name(name, data, "Series") + name = ibase.maybe_extract_name(name, data, type(self)) if is_empty_data(data) and dtype is None: # gh-17261 From 76407cf51e90dba7459a3ba9343c49a3f480d49c Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 24 Dec 2019 09:58:36 -0800 Subject: [PATCH 6/6] typo fixup --- pandas/core/indexes/interval.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index ea2fe7cfdb071..221ad564a56f4 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -218,7 +218,7 @@ def __new__( verify_integrity: bool = True, ): - name = maybe_extract_name(name, data, cls, cls) + name = maybe_extract_name(name, data, cls) with rewrite_exception("IntervalArray", cls.__name__): array = IntervalArray(