From d99d44a29194ba9487aa741297d41363d1d6f2e3 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 15 Jul 2020 07:24:39 -0500 Subject: [PATCH] Revert "BUG: fix union_indexes not supporting sort=False for Index subclasses (#35098)" (#35277) This reverts commit c21be0562a33d149b62735fc82aff80e4d5942f5. --- doc/source/whatsnew/v1.1.0.rst | 1 - pandas/core/indexes/api.py | 8 +------- pandas/tests/frame/test_constructors.py | 6 ++---- pandas/tests/indexes/test_common.py | 18 +---------------- pandas/tests/reshape/test_concat.py | 14 ------------- pandas/tests/reshape/test_melt.py | 26 ++++++++++++------------- pandas/tests/test_strings.py | 9 +-------- 7 files changed, 18 insertions(+), 64 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 3faca9c8868ca..dc1e7523046d5 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -1114,7 +1114,6 @@ Reshaping - Fixed bug in :func:`melt` where melting MultiIndex columns with ``col_level`` > 0 would raise a ``KeyError`` on ``id_vars`` (:issue:`34129`) - Bug in :meth:`Series.where` with an empty Series and empty ``cond`` having non-bool dtype (:issue:`34592`) - Fixed regression where :meth:`DataFrame.apply` would raise ``ValueError`` for elements whth ``S`` dtype (:issue:`34529`) -- Bug in :meth:`DataFrame.append` leading to sorting columns even when ``sort=False`` is specified (:issue:`35092`) Sparse ^^^^^^ diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index 9849742abcfca..4c5a70f4088ee 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -214,13 +214,7 @@ def conv(i): return result.union_many(indexes[1:]) else: for other in indexes[1:]: - # GH 35092. Index.union expects sort=None instead of sort=True - # to signify that sort=True isn't fully implemented and - # legacy implementation sometimes might not sort (see GH 24959) - # In this case we currently sort in _get_combined_index - if sort: - sort = None - result = result.union(other, sort=sort) + result = result.union(other) return result elif kind == "array": index = indexes[0] diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 17ac2307b9da6..1631342c359c1 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2578,13 +2578,11 @@ def test_construct_with_two_categoricalindex_series(self): index=pd.CategoricalIndex(["f", "female", "m", "male", "unknown"]), ) result = DataFrame([s1, s2]) - # GH 35092. Extra s2 columns are now appended to s1 columns - # in original order expected = DataFrame( np.array( - [[39.0, 6.0, 4.0, np.nan, np.nan], [152.0, 242.0, 150.0, 2.0, 2.0]] + [[np.nan, 39.0, np.nan, 6.0, 4.0], [2.0, 152.0, 2.0, 242.0, 150.0]] ), - columns=["female", "male", "unknown", "f", "m"], + columns=["f", "female", "m", "male", "unknown"], ) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py index c85696e02ad39..02a173eb4958d 100644 --- a/pandas/tests/indexes/test_common.py +++ b/pandas/tests/indexes/test_common.py @@ -13,9 +13,8 @@ from pandas.core.dtypes.common import is_period_dtype, needs_i8_conversion import pandas as pd -from pandas import CategoricalIndex, Index, MultiIndex, RangeIndex +from pandas import CategoricalIndex, MultiIndex, RangeIndex import pandas._testing as tm -from pandas.core.indexes.api import union_indexes class TestCommon: @@ -396,18 +395,3 @@ def test_astype_preserves_name(self, index, dtype, copy): assert result.names == index.names else: assert result.name == index.name - - -@pytest.mark.parametrize("arr", [[0, 1, 4, 3]]) -@pytest.mark.parametrize("dtype", ["int8", "int16", "int32", "int64"]) -def test_union_index_no_sort(arr, sort, dtype): - # GH 35092. Check that we don't sort with sort=False - ind1 = Index(arr[:2], dtype=dtype) - ind2 = Index(arr[2:], dtype=dtype) - - # sort is None indicates that we sort the combined index - if sort is None: - arr.sort() - expected = Index(arr, dtype=dtype) - result = union_indexes([ind1, ind2], sort=sort) - tm.assert_index_equal(result, expected) diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index ff95d8ad997a4..ffeb5ff0f8aaa 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -2857,17 +2857,3 @@ def test_concat_frame_axis0_extension_dtypes(): result = pd.concat([df2, df1], ignore_index=True) expected = pd.DataFrame({"a": [4, 5, 6, 1, 2, 3]}, dtype="Int64") tm.assert_frame_equal(result, expected) - - -@pytest.mark.parametrize("sort", [True, False]) -def test_append_sort(sort): - # GH 35092. Check that DataFrame.append respects the sort argument. - df1 = pd.DataFrame(data={0: [1, 2], 1: [3, 4]}) - df2 = pd.DataFrame(data={3: [1, 2], 2: [3, 4]}) - cols = list(df1.columns) + list(df2.columns) - if sort: - cols.sort() - - result = df1.append(df2, sort=sort).columns - expected = type(result)(cols) - tm.assert_index_equal(result, expected) diff --git a/pandas/tests/reshape/test_melt.py b/pandas/tests/reshape/test_melt.py index 241721432bbf9..2b75a1ec6ca6e 100644 --- a/pandas/tests/reshape/test_melt.py +++ b/pandas/tests/reshape/test_melt.py @@ -732,11 +732,11 @@ def test_unbalanced(self): ) df["id"] = df.index exp_data = { - "X": ["X1", "X2", "X1", "X2"], - "A": [1.0, 2.0, 3.0, 4.0], - "B": [5.0, 6.0, np.nan, np.nan], - "id": [0, 1, 0, 1], - "year": [2010, 2010, 2011, 2011], + "X": ["X1", "X1", "X2", "X2"], + "A": [1.0, 3.0, 2.0, 4.0], + "B": [5.0, np.nan, 6.0, np.nan], + "id": [0, 0, 1, 1], + "year": [2010, 2011, 2010, 2011], } expected = pd.DataFrame(exp_data) expected = expected.set_index(["id", "year"])[["X", "A", "B"]] @@ -979,10 +979,10 @@ def test_nonnumeric_suffix(self): ) expected = pd.DataFrame( { - "A": ["X1", "X2", "X1", "X2"], - "colname": ["placebo", "placebo", "test", "test"], - "result": [5.0, 6.0, np.nan, np.nan], - "treatment": [1.0, 2.0, 3.0, 4.0], + "A": ["X1", "X1", "X2", "X2"], + "colname": ["placebo", "test", "placebo", "test"], + "result": [5.0, np.nan, 6.0, np.nan], + "treatment": [1.0, 3.0, 2.0, 4.0], } ) expected = expected.set_index(["A", "colname"]) @@ -1026,10 +1026,10 @@ def test_float_suffix(self): ) expected = pd.DataFrame( { - "A": ["X1", "X2", "X1", "X2", "X1", "X2", "X1", "X2"], - "colname": [1.2, 1.2, 1.0, 1.0, 1.1, 1.1, 2.1, 2.1], - "result": [5.0, 6.0, 0.0, 9.0, np.nan, np.nan, np.nan, np.nan], - "treatment": [np.nan, np.nan, np.nan, np.nan, 1.0, 2.0, 3.0, 4.0], + "A": ["X1", "X1", "X1", "X1", "X2", "X2", "X2", "X2"], + "colname": [1, 1.1, 1.2, 2.1, 1, 1.1, 1.2, 2.1], + "result": [0.0, np.nan, 5.0, np.nan, 9.0, np.nan, 6.0, np.nan], + "treatment": [np.nan, 1.0, np.nan, 3.0, np.nan, 2.0, np.nan, 4.0], } ) expected = expected.set_index(["A", "colname"]) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 3a4e54052305e..d9396d70f9112 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -636,15 +636,8 @@ def test_str_cat_align_mixed_inputs(self, join): # mixed list of indexed/unindexed u = np.array(["A", "B", "C", "D"]) expected_outer = Series(["aaA", "bbB", "c-C", "ddD", "-e-"]) - # joint index of rhs [t, u]; u will be forced have index of s - # GH 35092. If right join, maintain order of t.index - if join == "inner": - rhs_idx = t.index & s.index - elif join == "right": - rhs_idx = t.index.union(s.index, sort=False) - else: - rhs_idx = t.index | s.index + rhs_idx = t.index & s.index if join == "inner" else t.index | s.index expected = expected_outer.loc[s.index.join(rhs_idx, how=join)] result = s.str.cat([t, u], join=join, na_rep="-")