diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index a269580bc4453..11195e448ed77 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -301,6 +301,7 @@ Categorical Datetimelike ^^^^^^^^^^^^ +- Bug in :meth:`DataFrame.combine_first` that would convert datetime-like column on other :class:`DataFrame` to integer when the column is not present in original :class:`DataFrame` (:issue:`28481`) - Bug in :attr:`DatetimeArray.date` where a ``ValueError`` would be raised with a read-only backing array (:issue:`33530`) - Bug in ``NaT`` comparisons failing to raise ``TypeError`` on invalid inequality comparisons (:issue:`35046`) - Bug in :class:`DateOffset` where attributes reconstructed from pickle files differ from original objects when input values exceed normal ranges (e.g months=12) (:issue:`34511`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 1f9987d9d3f5b..4267a563fc50e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6169,7 +6169,7 @@ def combine( otherSeries = otherSeries.astype(new_dtype) arr = func(series, otherSeries) - arr = maybe_downcast_to_dtype(arr, this_dtype) + arr = maybe_downcast_to_dtype(arr, new_dtype) result[col] = arr diff --git a/pandas/tests/frame/methods/test_combine_first.py b/pandas/tests/frame/methods/test_combine_first.py index 78f265d32f8df..6c1531d182767 100644 --- a/pandas/tests/frame/methods/test_combine_first.py +++ b/pandas/tests/frame/methods/test_combine_first.py @@ -103,6 +103,7 @@ def test_combine_first_mixed_bug(self): combined = frame1.combine_first(frame2) assert len(combined.columns) == 5 + def test_combine_first_same_as_in_update(self): # gh 3016 (same as in update) df = DataFrame( [[1.0, 2.0, False, True], [4.0, 5.0, True, False]], @@ -118,6 +119,7 @@ def test_combine_first_mixed_bug(self): df.loc[0, "A"] = 45 tm.assert_frame_equal(result, df) + def test_combine_first_doc_example(self): # doc example df1 = DataFrame( {"A": [1.0, np.nan, 3.0, 5.0, np.nan], "B": [np.nan, 2.0, 3.0, np.nan, 6.0]} @@ -134,16 +136,23 @@ def test_combine_first_mixed_bug(self): expected = DataFrame({"A": [1, 2, 3, 5, 3, 7.0], "B": [np.nan, 2, 3, 4, 6, 8]}) tm.assert_frame_equal(result, expected) + def test_combine_first_return_obj_type_with_bools(self): # GH3552, return object dtype with bools df1 = DataFrame( [[np.nan, 3.0, True], [-4.6, np.nan, True], [np.nan, 7.0, False]] ) df2 = DataFrame([[-42.6, np.nan, True], [-5.0, 1.6, False]], index=[1, 2]) - result = df1.combine_first(df2)[2] - expected = Series([True, True, False], name=2) - tm.assert_series_equal(result, expected) + expected1 = pd.Series([True, True, False], name=2, dtype=object) + expected2 = pd.Series([True, True, False], name=2, dtype=object) + result1 = df1.combine_first(df2)[2] + result2 = df2.combine_first(df1)[2] + + tm.assert_series_equal(result1, expected1) + tm.assert_series_equal(result2, expected2) + + def test_combine_first_convert_datatime_correctly(self): # GH 3593, converting datetime64[ns] incorrectly df0 = DataFrame( {"a": [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)]} @@ -339,9 +348,14 @@ def test_combine_first_int(self): df1 = pd.DataFrame({"a": [0, 1, 3, 5]}, dtype="int64") df2 = pd.DataFrame({"a": [1, 4]}, dtype="int64") - res = df1.combine_first(df2) - tm.assert_frame_equal(res, df1) - assert res["a"].dtype == "int64" + exp1 = pd.DataFrame({"a": [0, 1, 3, 5]}, dtype="float64") + exp2 = pd.DataFrame({"a": [1, 4, 3, 5]}, dtype="float64") + + res1 = df1.combine_first(df2) + res2 = df2.combine_first(df1) + + tm.assert_frame_equal(res1, exp1) + tm.assert_frame_equal(res2, exp2) @pytest.mark.parametrize("val", [1, 1.0]) def test_combine_first_with_asymmetric_other(self, val): @@ -353,3 +367,22 @@ def test_combine_first_with_asymmetric_other(self, val): exp = pd.DataFrame({"isBool": [True], "isNum": [val]}) tm.assert_frame_equal(res, exp) + + +@pytest.mark.parametrize( + "val1, val2", + [ + (datetime(2020, 1, 1), datetime(2020, 1, 2)), + (pd.Period("2020-01-01", "D"), pd.Period("2020-01-02", "D")), + (pd.Timedelta("89 days"), pd.Timedelta("60 min")), + ], +) +def test_combine_first_timestamp_bug(val1, val2, nulls_fixture): + + df1 = pd.DataFrame([[nulls_fixture, nulls_fixture]], columns=["a", "b"]) + df2 = pd.DataFrame([[val1, val2]], columns=["b", "c"]) + + res = df1.combine_first(df2) + exp = pd.DataFrame([[nulls_fixture, val1, val2]], columns=["a", "b", "c"]) + + tm.assert_frame_equal(res, exp)