Skip to content

Commit

Permalink
API: Have MultiIndex constructors return MI
Browse files Browse the repository at this point in the history
This removes the special case for MultiIndex constructors returning
an Index if all the levels are length-1. Now this will return a
MultiIndex with a single level.

This is a backwards incompatabile change, with no clear method for
deprecation, so we're making a clean break.

Closes #17178
  • Loading branch information
TomAugspurger committed Aug 24, 2017
1 parent 66ec5f3 commit 7ccce35
Show file tree
Hide file tree
Showing 12 changed files with 127 additions and 45 deletions.
21 changes: 21 additions & 0 deletions doc/source/whatsnew/v0.21.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,27 @@ named ``.isna()`` and ``.notna()``, these are included for classes ``Categorical

The configuration option ``pd.options.mode.use_inf_as_null`` is deprecated, and ``pd.options.mode.use_inf_as_na`` is added as a replacement.

.. _whatsnew_210.api.multiindex_single:

MultiIndex Constructor with a Single Level
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Previous versions would automatically squeeze a ``MultiIndex`` with length-one
``levels`` down to an ``Index``:

.. code-block:: ipython

In [2]: pd.MultiIndex.from_tuples([('a',), ('b',)])
Out[2]: Index(['a', 'b'], dtype='object')

Length 1 levels are no longer special-cased. They behave exactly as if you had
length 2+ levels, so a :class:`MultiIndex` is always returned from all of the
``MultiIndex`` constructors:

.. ipython:: python

pd.MultiIndex.from_tuples([('a',), ('b',)])

.. _whatsnew_0210.api:

Other API Changes
Expand Down
11 changes: 6 additions & 5 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,8 @@
_dict_compat,
standardize_mapping)
from pandas.core.generic import NDFrame, _shared_docs
from pandas.core.index import Index, MultiIndex, _ensure_index
from pandas.core.index import (Index, MultiIndex, _ensure_index,
_index_from_sequences)
from pandas.core.indexing import (maybe_droplevels, convert_to_index_sliceable,
check_bool_indexer)
from pandas.core.internals import (BlockManager,
Expand Down Expand Up @@ -1155,9 +1156,9 @@ def from_records(cls, data, index=None, exclude=None, columns=None,
else:
try:
to_remove = [arr_columns.get_loc(field) for field in index]

result_index = MultiIndex.from_arrays(
[arrays[i] for i in to_remove], names=index)
index_data = [arrays[i] for i in to_remove]
result_index = _index_from_sequences(index_data,
names=index)

exclude.update(index)
except Exception:
Expand Down Expand Up @@ -3000,7 +3001,7 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
to_remove.append(col)
arrays.append(level)

index = MultiIndex.from_arrays(arrays, names=names)
index = _index_from_sequences(arrays, names)

if verify_integrity and not index.is_unique:
duplicates = index.get_duplicates()
Expand Down
12 changes: 8 additions & 4 deletions pandas/core/indexes/api.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
from pandas.core.indexes.base import (Index, _new_Index, # noqa
_ensure_index, _get_na_value,
InvalidIndexError)
from pandas.core.indexes.base import (Index,
_new_Index,
_ensure_index,
_index_from_sequences,
_get_na_value,
InvalidIndexError) # noqa
from pandas.core.indexes.category import CategoricalIndex # noqa
from pandas.core.indexes.multi import MultiIndex # noqa
from pandas.core.indexes.interval import IntervalIndex # noqa
Expand All @@ -22,7 +25,8 @@
'InvalidIndexError', 'TimedeltaIndex',
'PeriodIndex', 'DatetimeIndex',
'_new_Index', 'NaT',
'_ensure_index', '_get_na_value', '_get_combined_index',
'_ensure_index', '_index_from_sequences', '_get_na_value',
'_get_combined_index',
'_get_objs_combined_axis',
'_get_distinct_indexes', '_union_indexes',
'_get_consensus_names',
Expand Down
27 changes: 27 additions & 0 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4007,6 +4007,33 @@ def invalid_op(self, other=None):
Index._add_comparison_methods()


def _index_from_sequences(sequences, names=None):
"""Construct an index from sequences of data.
A single sequence returns an Index.
Many sequences returns a MultiIndex.
Examples
--------
>>> _index_from_sequences([[1, 2, 3]], names=['name'])
Int64Index([1, 2, 3], dtype='int64', name='name')
>>> _index_from_sequences([['a', 'a'], ['a', 'b']], names=['L1', 'L2'])
MultiIndex(levels=[['a'], ['a', 'b']],
labels=[[0, 0], [0, 1]],
names=['L1', 'L2'])
"""
from .multi import MultiIndex

if len(sequences) == 1:
if names is not None:
names = names[0]
return Index(sequences[0], name=names)
else:
return MultiIndex.from_arrays(sequences, names=names)


def _ensure_index(index_like, copy=False):
if isinstance(index_like, Index):
if copy:
Expand Down
10 changes: 0 additions & 10 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,12 +91,6 @@ def __new__(cls, levels=None, labels=None, sortorder=None, names=None,
raise ValueError('Length of levels and labels must be the same.')
if len(levels) == 0:
raise ValueError('Must pass non-zero number of levels/labels')
if len(levels) == 1:
if names:
name = names[0]
else:
name = None
return Index(levels[0], name=name, copy=True).take(labels[0])

result = object.__new__(MultiIndex)

Expand Down Expand Up @@ -1084,10 +1078,6 @@ def from_arrays(cls, arrays, sortorder=None, names=None):
MultiIndex.from_product : Make a MultiIndex from cartesian product
of iterables
"""
if len(arrays) == 1:
name = None if names is None else names[0]
return Index(arrays[0], name=name)

# Check if lengths of all arrays are equal or not,
# raise ValueError, if not
for i in range(1, len(arrays)):
Expand Down
18 changes: 12 additions & 6 deletions pandas/core/reshape/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@

from pandas.core.frame import _shared_docs
from pandas.util._decorators import Appender
from pandas.core.index import MultiIndex, _get_na_value
from pandas.core.index import Index, MultiIndex, _get_na_value


class _Unstacker(object):
Expand Down Expand Up @@ -311,10 +311,13 @@ def _unstack_multiple(data, clocs):
recons_labels = decons_obs_group_ids(comp_ids, obs_ids, shape, clabels,
xnull=False)

dummy_index = MultiIndex(levels=rlevels + [obs_ids],
labels=rlabels + [comp_ids],
names=rnames + ['__placeholder__'],
verify_integrity=False)
if rlocs == []:
dummy_index = Index(obs_ids, name='__placeholder__')
else:
dummy_index = MultiIndex(levels=rlevels + [obs_ids],
labels=rlabels + [comp_ids],
names=rnames + ['__placeholder__'],
verify_integrity=False)

if isinstance(data, Series):
dummy = data.copy()
Expand Down Expand Up @@ -446,7 +449,10 @@ def _slow_pivot(index, columns, values):

def unstack(obj, level, fill_value=None):
if isinstance(level, (tuple, list)):
return _unstack_multiple(obj, level)
if len(level) == 1:
level = level[0]
else:
return _unstack_multiple(obj, level)

if isinstance(obj, DataFrame):
if isinstance(obj.index, MultiIndex):
Expand Down
6 changes: 5 additions & 1 deletion pandas/core/sparse/scipy_sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,11 @@ def robust_get_level_values(i):
labels_to_i = Series(labels_to_i)
if len(subset) > 1:
labels_to_i.index = MultiIndex.from_tuples(labels_to_i.index)
labels_to_i.index.names = [index.names[i] for i in subset]
labels_to_i.index.names = [index.names[i] for i in subset]
else:
labels_to_i.index = Index(x[0] for x in labels_to_i.index)
labels_to_i.index.name = index.names[subset[0]]

labels_to_i.name = 'value'
return (labels_to_i)

Expand Down
5 changes: 4 additions & 1 deletion pandas/core/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -1452,7 +1452,10 @@ def cons_row(x):

if expand:
result = list(result)
return MultiIndex.from_tuples(result, names=name)
out = MultiIndex.from_tuples(result, names=name)
if out.nlevels == 1:
out = out.get_level_values(0)
return out
else:
return Index(result, name=name)
else:
Expand Down
21 changes: 15 additions & 6 deletions pandas/io/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@
is_scalar, is_categorical_dtype)
from pandas.core.dtypes.missing import isna
from pandas.core.dtypes.cast import astype_nansafe
from pandas.core.index import Index, MultiIndex, RangeIndex
from pandas.core.index import (Index, MultiIndex, RangeIndex,
_index_from_sequences)
from pandas.core.series import Series
from pandas.core.frame import DataFrame
from pandas.core.categorical import Categorical
Expand Down Expand Up @@ -1444,7 +1445,16 @@ def _agg_index(self, index, try_parse_dates=True):
arr, _ = self._infer_types(arr, col_na_values | col_na_fvalues)
arrays.append(arr)

index = MultiIndex.from_arrays(arrays, names=self.index_names)
names = self.index_names
index = _index_from_sequences(arrays, names)
if len(arrays) > 1:
index = MultiIndex.from_arrays(arrays, names=self.index_names)
else:
if self.index_names is None:
name = None
else:
name = self.index_names[0]
index = Index(arrays[0], name=name)

return index

Expand Down Expand Up @@ -1808,7 +1818,7 @@ def read(self, nrows=None):
try_parse_dates=True)
arrays.append(values)

index = MultiIndex.from_arrays(arrays)
index = _index_from_sequences(arrays)

if self.usecols is not None:
names = self._filter_usecols(names)
Expand Down Expand Up @@ -3138,9 +3148,8 @@ def _get_empty_meta(columns, index_col, index_names, dtype=None):
if index_col is None or index_col is False:
index = Index([])
else:
index = [Series([], dtype=dtype[index_name])
for index_name in index_names]
index = MultiIndex.from_arrays(index, names=index_names)
data = [Series([], dtype=dtype[name]) for name in index_names]
index = _index_from_sequences(data, names=index_names)
index_col.sort()
for i, n in enumerate(index_col):
columns.pop(n - i)
Expand Down
18 changes: 17 additions & 1 deletion pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
DataFrame, Float64Index, Int64Index,
CategoricalIndex, DatetimeIndex, TimedeltaIndex,
PeriodIndex, isna)
from pandas.core.index import _get_combined_index
from pandas.core.index import _get_combined_index, _index_from_sequences
from pandas.util.testing import assert_almost_equal
from pandas.compat.numpy import np_datetime64_compat

Expand Down Expand Up @@ -2103,3 +2103,19 @@ def test_intersect_str_dates(self):
res = i2.intersection(i1)

assert len(res) == 0


class TestIndexUtils(object):

@pytest.mark.parametrize('data, names, expected', [
([[1, 2, 3]], None, Index([1, 2, 3])),
([[1, 2, 3]], ['name'], Index([1, 2, 3], name='name')),
([['a', 'a'], ['c', 'd']], None,
MultiIndex([['a'], ['c', 'd']], [[0, 0], [0, 1]])),
([['a', 'a'], ['c', 'd']], ['L1', 'L2'],
MultiIndex([['a'], ['c', 'd']], [[0, 0], [0, 1]],
names=['L1', 'L2'])),
])
def test_index_from_sequences(self, data, names, expected):
result = _index_from_sequences(data, names)
tm.assert_index_equal(result, expected)
20 changes: 9 additions & 11 deletions pandas/tests/indexes/test_multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -537,15 +537,12 @@ def test_astype(self):
self.index.astype(np.dtype(int))

def test_constructor_single_level(self):
single_level = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']],
labels=[[0, 1, 2, 3]], names=['first'])
assert isinstance(single_level, Index)
assert not isinstance(single_level, MultiIndex)
assert single_level.name == 'first'

single_level = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']],
labels=[[0, 1, 2, 3]])
assert single_level.name is None
result = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']],
labels=[[0, 1, 2, 3]], names=['first'])
assert isinstance(result, MultiIndex)
expected = Index(['foo', 'bar', 'baz', 'qux'], name='first')
tm.assert_index_equal(result.levels[0], expected)
assert result.names == ['first']

def test_constructor_no_levels(self):
tm.assert_raises_regex(ValueError, "non-zero number "
Expand Down Expand Up @@ -768,8 +765,9 @@ def test_from_arrays_empty(self):

# 1 level
result = MultiIndex.from_arrays(arrays=[[]], names=['A'])
assert isinstance(result, MultiIndex)
expected = Index([], name='A')
tm.assert_index_equal(result, expected)
tm.assert_index_equal(result.levels[0], expected)

# N levels
for N in [2, 3]:
Expand Down Expand Up @@ -830,7 +828,7 @@ def test_from_product_empty(self):
# 1 level
result = MultiIndex.from_product([[]], names=['A'])
expected = pd.Index([], name='A')
tm.assert_index_equal(result, expected)
tm.assert_index_equal(result.levels[0], expected)

# 2 levels
l1 = [[], ['foo', 'bar', 'baz'], []]
Expand Down
3 changes: 3 additions & 0 deletions pandas/util/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1910,6 +1910,9 @@ def keyfunc(x):
# convert tuples to index
if nentries == 1:
index = Index(tuples[0], name=names[0])
elif nlevels == 1:
name = None if names is None else names[0]
index = Index((x[0] for x in tuples), name=name)
else:
index = MultiIndex.from_tuples(tuples, names=names)
return index
Expand Down

0 comments on commit 7ccce35

Please sign in to comment.