Skip to content

Commit

Permalink
API: Have MultiIndex constructors return MI
Browse files Browse the repository at this point in the history
This removes the special case for MultiIndex constructors returning
an Index if all the levels are length-1. Now this will return a
MultiIndex with a single level.

This is a backwards incompatabile change, with no clear method for
deprecation, so we're making a clean break.

Closes #17178
  • Loading branch information
TomAugspurger committed Aug 23, 2017
1 parent 2bec750 commit be17791
Show file tree
Hide file tree
Showing 9 changed files with 82 additions and 37 deletions.
21 changes: 21 additions & 0 deletions doc/source/whatsnew/v0.21.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,27 @@ named ``.isna()`` and ``.notna()``, these are included for classes ``Categorical

The configuration option ``pd.options.mode.use_inf_as_null`` is deprecated, and ``pd.options.mode.use_inf_as_na`` is added as a replacement.

.. _whatsnew_210.api.multiindex_single:

MultiIndex Constructor with a Single Level
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Previous versions would automatically squeeze a ``MultiIndex`` with length-one
``levels`` down to an ``Index``:

.. code-block:: ipython

In [2]: pd.MultiIndex.from_tuples([('a',), ('b',)])
Out[2]: Index(['a', 'b'], dtype='object')

Length 1 levels are no longer special-cased. They behave exactly as if you had
length 2+ levels, so a :class:`MultiIndex` is always returned from all of the
``MultiIndex`` constructors:

.. ipython:: python

pd.MultiIndex.from_tuples([('a',), ('b',)])

.. _whatsnew_0210.api:

Other API Changes
Expand Down
12 changes: 9 additions & 3 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1156,8 +1156,11 @@ def from_records(cls, data, index=None, exclude=None, columns=None,
try:
to_remove = [arr_columns.get_loc(field) for field in index]

result_index = MultiIndex.from_arrays(
[arrays[i] for i in to_remove], names=index)
if len(to_remove) == 1:
result_index = Index(arrays[to_remove[0]], name=index[0])
else:
result_index = MultiIndex.from_arrays(
[arrays[i] for i in to_remove], names=index)

exclude.update(index)
except Exception:
Expand Down Expand Up @@ -3000,7 +3003,10 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
to_remove.append(col)
arrays.append(level)

index = MultiIndex.from_arrays(arrays, names=names)
if len(arrays) == 1:
index = Index(arrays[0], name=names[0])
else:
index = MultiIndex.from_arrays(arrays, names=names)

if verify_integrity and not index.is_unique:
duplicates = index.get_duplicates()
Expand Down
10 changes: 0 additions & 10 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,12 +91,6 @@ def __new__(cls, levels=None, labels=None, sortorder=None, names=None,
raise ValueError('Length of levels and labels must be the same.')
if len(levels) == 0:
raise ValueError('Must pass non-zero number of levels/labels')
if len(levels) == 1:
if names:
name = names[0]
else:
name = None
return Index(levels[0], name=name, copy=True).take(labels[0])

result = object.__new__(MultiIndex)

Expand Down Expand Up @@ -1084,10 +1078,6 @@ def from_arrays(cls, arrays, sortorder=None, names=None):
MultiIndex.from_product : Make a MultiIndex from cartesian product
of iterables
"""
if len(arrays) == 1:
name = None if names is None else names[0]
return Index(arrays[0], name=name)

# Check if lengths of all arrays are equal or not,
# raise ValueError, if not
for i in range(1, len(arrays)):
Expand Down
18 changes: 12 additions & 6 deletions pandas/core/reshape/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@

from pandas.core.frame import _shared_docs
from pandas.util._decorators import Appender
from pandas.core.index import MultiIndex, _get_na_value
from pandas.core.index import Index, MultiIndex, _get_na_value


class _Unstacker(object):
Expand Down Expand Up @@ -311,10 +311,13 @@ def _unstack_multiple(data, clocs):
recons_labels = decons_obs_group_ids(comp_ids, obs_ids, shape, clabels,
xnull=False)

dummy_index = MultiIndex(levels=rlevels + [obs_ids],
labels=rlabels + [comp_ids],
names=rnames + ['__placeholder__'],
verify_integrity=False)
if rlocs == []:
dummy_index = Index(obs_ids, name='__placeholder__')
else:
dummy_index = MultiIndex(levels=rlevels + [obs_ids],
labels=rlabels + [comp_ids],
names=rnames + ['__placeholder__'],
verify_integrity=False)

if isinstance(data, Series):
dummy = data.copy()
Expand Down Expand Up @@ -446,7 +449,10 @@ def _slow_pivot(index, columns, values):

def unstack(obj, level, fill_value=None):
if isinstance(level, (tuple, list)):
return _unstack_multiple(obj, level)
if len(level) == 1:
level = level[0]
else:
return _unstack_multiple(obj, level)

if isinstance(obj, DataFrame):
if isinstance(obj.index, MultiIndex):
Expand Down
6 changes: 5 additions & 1 deletion pandas/core/sparse/scipy_sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,11 @@ def robust_get_level_values(i):
labels_to_i = Series(labels_to_i)
if len(subset) > 1:
labels_to_i.index = MultiIndex.from_tuples(labels_to_i.index)
labels_to_i.index.names = [index.names[i] for i in subset]
labels_to_i.index.names = [index.names[i] for i in subset]
else:
labels_to_i.index = Index(x[0] for x in labels_to_i.index)
labels_to_i.index.name = index.names[subset[0]]

labels_to_i.name = 'value'
return (labels_to_i)

Expand Down
5 changes: 4 additions & 1 deletion pandas/core/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -1452,7 +1452,10 @@ def cons_row(x):

if expand:
result = list(result)
return MultiIndex.from_tuples(result, names=name)
out = MultiIndex.from_tuples(result, names=name)
if out.nlevels == 1:
out = out.get_level_values(0)
return out
else:
return Index(result, name=name)
else:
Expand Down
24 changes: 19 additions & 5 deletions pandas/io/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1444,7 +1444,14 @@ def _agg_index(self, index, try_parse_dates=True):
arr, _ = self._infer_types(arr, col_na_values | col_na_fvalues)
arrays.append(arr)

index = MultiIndex.from_arrays(arrays, names=self.index_names)
if len(arrays) > 1:
index = MultiIndex.from_arrays(arrays, names=self.index_names)
else:
if self.index_names is None:
name = None
else:
name = self.index_names[0]
index = Index(arrays[0], name=name)

return index

Expand Down Expand Up @@ -1808,7 +1815,10 @@ def read(self, nrows=None):
try_parse_dates=True)
arrays.append(values)

index = MultiIndex.from_arrays(arrays)
if self._reader.leading_cols == 1:
index = Index(arrays[0])
else:
index = MultiIndex.from_arrays(arrays)

if self.usecols is not None:
names = self._filter_usecols(names)
Expand Down Expand Up @@ -3138,9 +3148,13 @@ def _get_empty_meta(columns, index_col, index_names, dtype=None):
if index_col is None or index_col is False:
index = Index([])
else:
index = [Series([], dtype=dtype[index_name])
for index_name in index_names]
index = MultiIndex.from_arrays(index, names=index_names)
if len(index_names) == 1:
name = index_names[0]
index = Index(Series([], dtype=dtype[name], name=name))
else:
index = [Series([], dtype=dtype[index_name])
for index_name in index_names]
index = MultiIndex.from_arrays(index, names=index_names)
index_col.sort()
for i, n in enumerate(index_col):
columns.pop(n - i)
Expand Down
20 changes: 9 additions & 11 deletions pandas/tests/indexes/test_multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -537,15 +537,12 @@ def test_astype(self):
self.index.astype(np.dtype(int))

def test_constructor_single_level(self):
single_level = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']],
labels=[[0, 1, 2, 3]], names=['first'])
assert isinstance(single_level, Index)
assert not isinstance(single_level, MultiIndex)
assert single_level.name == 'first'

single_level = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']],
labels=[[0, 1, 2, 3]])
assert single_level.name is None
result = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']],
labels=[[0, 1, 2, 3]], names=['first'])
assert isinstance(result, MultiIndex)
expected = Index(['foo', 'bar', 'baz', 'qux'], name='first')
tm.assert_index_equal(result.levels[0], expected)
assert result.names == ['first']

def test_constructor_no_levels(self):
tm.assert_raises_regex(ValueError, "non-zero number "
Expand Down Expand Up @@ -768,8 +765,9 @@ def test_from_arrays_empty(self):

# 1 level
result = MultiIndex.from_arrays(arrays=[[]], names=['A'])
assert isinstance(result, MultiIndex)
expected = Index([], name='A')
tm.assert_index_equal(result, expected)
tm.assert_index_equal(result.levels[0], expected)

# N levels
for N in [2, 3]:
Expand Down Expand Up @@ -830,7 +828,7 @@ def test_from_product_empty(self):
# 1 level
result = MultiIndex.from_product([[]], names=['A'])
expected = pd.Index([], name='A')
tm.assert_index_equal(result, expected)
tm.assert_index_equal(result.levels[0], expected)

# 2 levels
l1 = [[], ['foo', 'bar', 'baz'], []]
Expand Down
3 changes: 3 additions & 0 deletions pandas/util/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1910,6 +1910,9 @@ def keyfunc(x):
# convert tuples to index
if nentries == 1:
index = Index(tuples[0], name=names[0])
elif nlevels == 1:
name = None if names is None else names[0]
index = Index((x[0] for x in tuples), name=name)
else:
index = MultiIndex.from_tuples(tuples, names=names)
return index
Expand Down

0 comments on commit be17791

Please sign in to comment.