diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index dcf69dfda1ae8..fd156ccfc8b31 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -297,19 +297,10 @@ def apply_with_block(self: T, f, align_keys=None, swap_axis=True, **kwargs) -> T if obj.ndim == 2: kwargs[k] = obj[[i]] - # error: Item "ExtensionArray" of "Union[Any, ExtensionArray]" has no - # attribute "tz" - if hasattr(arr, "tz") and arr.tz is None: # type: ignore[union-attr] - # DatetimeArray needs to be converted to ndarray for DatetimeLikeBlock - - # error: Item "ExtensionArray" of "Union[Any, ExtensionArray]" has no - # attribute "_data" - arr = arr._data # type: ignore[union-attr] - elif arr.dtype.kind == "m" and not isinstance(arr, np.ndarray): - # TimedeltaArray needs to be converted to ndarray for TimedeltaBlock - - # error: "ExtensionArray" has no attribute "_data" - arr = arr._data # type: ignore[attr-defined] + if isinstance(arr.dtype, np.dtype) and not isinstance(arr, np.ndarray): + # i.e. TimedeltaArray, DatetimeArray with tz=None. Need to + # convert for the Block constructors. + arr = np.asarray(arr) if self.ndim == 2: arr = ensure_block_shape(arr, 2) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 53d59c78b40cc..a5da960427fe7 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -682,62 +682,9 @@ def test_view(self, data): class TestBaseMissing(base.BaseMissingTests): - def test_fillna_limit_pad(self, data_missing, using_array_manager, request): - if using_array_manager and pa.types.is_duration( - data_missing.dtype.pyarrow_dtype - ): - request.node.add_marker( - pytest.mark.xfail( - reason="Checking ndim when using arraymanager with duration type" - ) - ) - super().test_fillna_limit_pad(data_missing) - - def test_fillna_limit_backfill(self, data_missing, using_array_manager, request): - if using_array_manager and pa.types.is_duration( - data_missing.dtype.pyarrow_dtype - ): - request.node.add_marker( - pytest.mark.xfail( - reason="Checking ndim when using arraymanager with duration type" - ) - ) - super().test_fillna_limit_backfill(data_missing) - - def test_fillna_series(self, data_missing, using_array_manager, request): - if using_array_manager and pa.types.is_duration( - data_missing.dtype.pyarrow_dtype - ): - request.node.add_marker( - pytest.mark.xfail( - reason="Checking ndim when using arraymanager with duration type" - ) - ) - super().test_fillna_series(data_missing) - - def test_fillna_series_method( - self, data_missing, fillna_method, using_array_manager, request - ): - if using_array_manager and pa.types.is_duration( - data_missing.dtype.pyarrow_dtype - ): - request.node.add_marker( - pytest.mark.xfail( - reason="Checking ndim when using arraymanager with duration type" - ) - ) - super().test_fillna_series_method(data_missing, fillna_method) - - def test_fillna_frame(self, data_missing, using_array_manager, request): - if using_array_manager and pa.types.is_duration( - data_missing.dtype.pyarrow_dtype - ): - request.node.add_marker( - pytest.mark.xfail( - reason="Checking ndim when using arraymanager with duration type" - ) - ) - super().test_fillna_frame(data_missing) + @pytest.mark.filterwarnings("ignore:Falling back:pandas.errors.PerformanceWarning") + def test_dropna_array(self, data_missing): + super().test_dropna_array(data_missing) class TestBasePrinting(base.BasePrintingTests): @@ -947,7 +894,7 @@ def test_setitem_scalar_series(self, data, box_in_series, request): ) super().test_setitem_scalar_series(data, box_in_series) - def test_setitem_sequence(self, data, box_in_series, using_array_manager, request): + def test_setitem_sequence(self, data, box_in_series, request): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) if pa_version_under2p0 and tz not in (None, "UTC"): request.node.add_marker( @@ -955,47 +902,9 @@ def test_setitem_sequence(self, data, box_in_series, using_array_manager, reques reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) - elif ( - using_array_manager - and pa.types.is_duration(data.dtype.pyarrow_dtype) - and box_in_series - ): - request.node.add_marker( - pytest.mark.xfail( - reason="Checking ndim when using arraymanager with duration type" - ) - ) super().test_setitem_sequence(data, box_in_series) - def test_setitem_sequence_mismatched_length_raises( - self, data, as_array, using_array_manager, request - ): - if using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): - request.node.add_marker( - pytest.mark.xfail( - reason="Checking ndim when using arraymanager with duration type" - ) - ) - super().test_setitem_sequence_mismatched_length_raises(data, as_array) - - def test_setitem_empty_indexer( - self, data, box_in_series, using_array_manager, request - ): - if ( - using_array_manager - and pa.types.is_duration(data.dtype.pyarrow_dtype) - and box_in_series - ): - request.node.add_marker( - pytest.mark.xfail( - reason="Checking ndim when using arraymanager with duration type" - ) - ) - super().test_setitem_empty_indexer(data, box_in_series) - - def test_setitem_sequence_broadcasts( - self, data, box_in_series, using_array_manager, request - ): + def test_setitem_sequence_broadcasts(self, data, box_in_series, request): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) if pa_version_under2p0 and tz not in (None, "UTC"): request.node.add_marker( @@ -1003,20 +912,10 @@ def test_setitem_sequence_broadcasts( reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) - elif ( - using_array_manager - and pa.types.is_duration(data.dtype.pyarrow_dtype) - and box_in_series - ): - request.node.add_marker( - pytest.mark.xfail( - reason="Checking ndim when using arraymanager with duration type" - ) - ) super().test_setitem_sequence_broadcasts(data, box_in_series) @pytest.mark.parametrize("setter", ["loc", "iloc"]) - def test_setitem_scalar(self, data, setter, using_array_manager, request): + def test_setitem_scalar(self, data, setter, request): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) if pa_version_under2p0 and tz not in (None, "UTC"): request.node.add_marker( @@ -1024,15 +923,9 @@ def test_setitem_scalar(self, data, setter, using_array_manager, request): reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) - elif using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): - request.node.add_marker( - pytest.mark.xfail( - reason="Checking ndim when using arraymanager with duration type" - ) - ) super().test_setitem_scalar(data, setter) - def test_setitem_loc_scalar_mixed(self, data, using_array_manager, request): + def test_setitem_loc_scalar_mixed(self, data, request): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) if pa_version_under2p0 and tz not in (None, "UTC"): request.node.add_marker( @@ -1040,15 +933,9 @@ def test_setitem_loc_scalar_mixed(self, data, using_array_manager, request): reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) - elif using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): - request.node.add_marker( - pytest.mark.xfail( - reason="Checking ndim when using arraymanager with duration type" - ) - ) super().test_setitem_loc_scalar_mixed(data) - def test_setitem_loc_scalar_single(self, data, using_array_manager, request): + def test_setitem_loc_scalar_single(self, data, request): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) if pa_version_under2p0 and tz not in (None, "UTC"): request.node.add_marker( @@ -1056,17 +943,9 @@ def test_setitem_loc_scalar_single(self, data, using_array_manager, request): reason=f"Not supported by pyarrow < 2.0 with timestamp type {tz}" ) ) - elif using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): - request.node.add_marker( - pytest.mark.xfail( - reason="Checking ndim when using arraymanager with duration type" - ) - ) super().test_setitem_loc_scalar_single(data) - def test_setitem_loc_scalar_multiple_homogoneous( - self, data, using_array_manager, request - ): + def test_setitem_loc_scalar_multiple_homogoneous(self, data, request): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) if pa_version_under2p0 and tz not in (None, "UTC"): request.node.add_marker( @@ -1074,15 +953,9 @@ def test_setitem_loc_scalar_multiple_homogoneous( reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) - elif using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): - request.node.add_marker( - pytest.mark.xfail( - reason="Checking ndim when using arraymanager with duration type" - ) - ) super().test_setitem_loc_scalar_multiple_homogoneous(data) - def test_setitem_iloc_scalar_mixed(self, data, using_array_manager, request): + def test_setitem_iloc_scalar_mixed(self, data, request): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) if pa_version_under2p0 and tz not in (None, "UTC"): request.node.add_marker( @@ -1090,15 +963,9 @@ def test_setitem_iloc_scalar_mixed(self, data, using_array_manager, request): reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) - elif using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): - request.node.add_marker( - pytest.mark.xfail( - reason="Checking ndim when using arraymanager with duration type" - ) - ) super().test_setitem_iloc_scalar_mixed(data) - def test_setitem_iloc_scalar_single(self, data, using_array_manager, request): + def test_setitem_iloc_scalar_single(self, data, request): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) if pa_version_under2p0 and tz not in (None, "UTC"): request.node.add_marker( @@ -1106,17 +973,9 @@ def test_setitem_iloc_scalar_single(self, data, using_array_manager, request): reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) - elif using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): - request.node.add_marker( - pytest.mark.xfail( - reason="Checking ndim when using arraymanager with duration type" - ) - ) super().test_setitem_iloc_scalar_single(data) - def test_setitem_iloc_scalar_multiple_homogoneous( - self, data, using_array_manager, request - ): + def test_setitem_iloc_scalar_multiple_homogoneous(self, data, request): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) if pa_version_under2p0 and tz not in (None, "UTC"): request.node.add_marker( @@ -1124,12 +983,6 @@ def test_setitem_iloc_scalar_multiple_homogoneous( reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) - elif using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): - request.node.add_marker( - pytest.mark.xfail( - reason="Checking ndim when using arraymanager with duration type" - ) - ) super().test_setitem_iloc_scalar_multiple_homogoneous(data) @pytest.mark.parametrize( @@ -1141,9 +994,7 @@ def test_setitem_iloc_scalar_multiple_homogoneous( ], ids=["numpy-array", "boolean-array", "boolean-array-na"], ) - def test_setitem_mask( - self, data, mask, box_in_series, using_array_manager, request - ): + def test_setitem_mask(self, data, mask, box_in_series, request): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) if pa_version_under2p0 and tz not in (None, "UTC"): request.node.add_marker( @@ -1151,21 +1002,9 @@ def test_setitem_mask( reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) - elif ( - using_array_manager - and pa.types.is_duration(data.dtype.pyarrow_dtype) - and box_in_series - ): - request.node.add_marker( - pytest.mark.xfail( - reason="Checking ndim when using arraymanager with duration type" - ) - ) super().test_setitem_mask(data, mask, box_in_series) - def test_setitem_mask_boolean_array_with_na( - self, data, box_in_series, using_array_manager, request - ): + def test_setitem_mask_boolean_array_with_na(self, data, box_in_series, request): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) unit = getattr(data.dtype.pyarrow_dtype, "unit", None) if pa_version_under2p0 and tz not in (None, "UTC") and unit == "us": @@ -1174,16 +1013,6 @@ def test_setitem_mask_boolean_array_with_na( reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) - elif ( - using_array_manager - and pa.types.is_duration(data.dtype.pyarrow_dtype) - and box_in_series - ): - request.node.add_marker( - pytest.mark.xfail( - reason="Checking ndim when using arraymanager with duration type" - ) - ) super().test_setitem_mask_boolean_array_with_na(data, box_in_series) @pytest.mark.parametrize( @@ -1191,9 +1020,7 @@ def test_setitem_mask_boolean_array_with_na( [[0, 1, 2], pd.array([0, 1, 2], dtype="Int64"), np.array([0, 1, 2])], ids=["list", "integer-array", "numpy-array"], ) - def test_setitem_integer_array( - self, data, idx, box_in_series, using_array_manager, request - ): + def test_setitem_integer_array(self, data, idx, box_in_series, request): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) if pa_version_under2p0 and tz not in (None, "UTC"): request.node.add_marker( @@ -1201,23 +1028,11 @@ def test_setitem_integer_array( reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) - elif ( - using_array_manager - and pa.types.is_duration(data.dtype.pyarrow_dtype) - and box_in_series - ): - request.node.add_marker( - pytest.mark.xfail( - reason="Checking ndim when using arraymanager with duration type" - ) - ) super().test_setitem_integer_array(data, idx, box_in_series) @pytest.mark.parametrize("as_callable", [True, False]) @pytest.mark.parametrize("setter", ["loc", None]) - def test_setitem_mask_aligned( - self, data, as_callable, setter, using_array_manager, request - ): + def test_setitem_mask_aligned(self, data, as_callable, setter, request): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) if pa_version_under2p0 and tz not in (None, "UTC"): request.node.add_marker( @@ -1225,16 +1040,10 @@ def test_setitem_mask_aligned( reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) - elif using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): - request.node.add_marker( - pytest.mark.xfail( - reason="Checking ndim when using arraymanager with duration type" - ) - ) super().test_setitem_mask_aligned(data, as_callable, setter) @pytest.mark.parametrize("setter", ["loc", None]) - def test_setitem_mask_broadcast(self, data, setter, using_array_manager, request): + def test_setitem_mask_broadcast(self, data, setter, request): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) if pa_version_under2p0 and tz not in (None, "UTC"): request.node.add_marker( @@ -1242,12 +1051,6 @@ def test_setitem_mask_broadcast(self, data, setter, using_array_manager, request reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) - elif using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): - request.node.add_marker( - pytest.mark.xfail( - reason="Checking ndim when using arraymanager with duration type" - ) - ) super().test_setitem_mask_broadcast(data, setter) def test_setitem_tuple_index(self, data, request): @@ -1260,7 +1063,7 @@ def test_setitem_tuple_index(self, data, request): ) super().test_setitem_tuple_index(data) - def test_setitem_slice(self, data, box_in_series, using_array_manager, request): + def test_setitem_slice(self, data, box_in_series, request): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) if pa_version_under2p0 and tz not in (None, "UTC"): request.node.add_marker( @@ -1268,19 +1071,9 @@ def test_setitem_slice(self, data, box_in_series, using_array_manager, request): reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) - elif ( - using_array_manager - and pa.types.is_duration(data.dtype.pyarrow_dtype) - and box_in_series - ): - request.node.add_marker( - pytest.mark.xfail( - reason="Checking ndim when using arraymanager with duration type" - ) - ) super().test_setitem_slice(data, box_in_series) - def test_setitem_loc_iloc_slice(self, data, using_array_manager, request): + def test_setitem_loc_iloc_slice(self, data, request): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) if pa_version_under2p0 and tz not in (None, "UTC"): request.node.add_marker( @@ -1288,12 +1081,6 @@ def test_setitem_loc_iloc_slice(self, data, using_array_manager, request): reason=f"Not supported by pyarrow < 2.0 with timestamp type {tz}" ) ) - elif using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): - request.node.add_marker( - pytest.mark.xfail( - reason="Checking ndim when using arraymanager with duration type" - ) - ) super().test_setitem_loc_iloc_slice(data) def test_setitem_slice_array(self, data, request): @@ -1306,9 +1093,7 @@ def test_setitem_slice_array(self, data, request): ) super().test_setitem_slice_array(data) - def test_setitem_with_expansion_dataframe_column( - self, data, full_indexer, using_array_manager, request - ): + def test_setitem_with_expansion_dataframe_column(self, data, full_indexer, request): # Is there a better way to get the full_indexer id "null_slice"? is_null_slice = "null_slice" in request.node.nodeid tz = getattr(data.dtype.pyarrow_dtype, "tz", None) @@ -1318,21 +1103,9 @@ def test_setitem_with_expansion_dataframe_column( reason=f"Not supported by pyarrow < 2.0 with timestamp type {tz}" ) ) - elif ( - using_array_manager - and pa.types.is_duration(data.dtype.pyarrow_dtype) - and not is_null_slice - ): - request.node.add_marker( - pytest.mark.xfail( - reason="Checking ndim when using arraymanager with duration type" - ) - ) super().test_setitem_with_expansion_dataframe_column(data, full_indexer) - def test_setitem_with_expansion_row( - self, data, na_value, using_array_manager, request - ): + def test_setitem_with_expansion_row(self, data, na_value, request): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) if pa_version_under2p0 and tz not in (None, "UTC"): request.node.add_marker( @@ -1340,15 +1113,9 @@ def test_setitem_with_expansion_row( reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) - elif using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): - request.node.add_marker( - pytest.mark.xfail( - reason="Checking ndim when using arraymanager with duration type" - ) - ) super().test_setitem_with_expansion_row(data, na_value) - def test_setitem_frame_2d_values(self, data, using_array_manager, request): + def test_setitem_frame_2d_values(self, data, request): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) if pa_version_under2p0 and tz not in (None, "UTC"): request.node.add_marker( @@ -1356,12 +1123,6 @@ def test_setitem_frame_2d_values(self, data, using_array_manager, request): reason=f"Not supported by pyarrow < 2.0 with timestamp type {tz}" ) ) - elif using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): - request.node.add_marker( - pytest.mark.xfail( - reason="Checking ndim when using arraymanager with duration type" - ) - ) super().test_setitem_frame_2d_values(data) @pytest.mark.xfail(reason="GH 45419: pyarrow.ChunkedArray does not support views") @@ -1586,26 +1347,6 @@ def test_factorize_empty(self, data, request): ) super().test_factorize_empty(data) - def test_fillna_copy_frame(self, data_missing, request, using_array_manager): - pa_dtype = data_missing.dtype.pyarrow_dtype - if using_array_manager and pa.types.is_duration(pa_dtype): - request.node.add_marker( - pytest.mark.xfail( - reason=f"Checking ndim when using arraymanager with {pa_dtype}" - ) - ) - super().test_fillna_copy_frame(data_missing) - - def test_fillna_copy_series(self, data_missing, request, using_array_manager): - pa_dtype = data_missing.dtype.pyarrow_dtype - if using_array_manager and pa.types.is_duration(pa_dtype): - request.node.add_marker( - pytest.mark.xfail( - reason=f"Checking ndim when using arraymanager with {pa_dtype}" - ) - ) - super().test_fillna_copy_series(data_missing) - def test_shift_fill_value(self, data, request): pa_dtype = data.dtype.pyarrow_dtype tz = getattr(pa_dtype, "tz", None) @@ -1643,16 +1384,10 @@ def test_insert(self, data, request): ) super().test_insert(data) - def test_combine_first(self, data, request, using_array_manager): + def test_combine_first(self, data, request): pa_dtype = data.dtype.pyarrow_dtype tz = getattr(pa_dtype, "tz", None) - if using_array_manager and pa.types.is_duration(pa_dtype): - request.node.add_marker( - pytest.mark.xfail( - reason=f"Checking ndim when using arraymanager with {pa_dtype}" - ) - ) - elif pa_version_under2p0 and tz not in (None, "UTC"): + if pa_version_under2p0 and tz not in (None, "UTC"): request.node.add_marker( pytest.mark.xfail( reason=f"Not supported by pyarrow < 2.0 with timestamp type {tz}" @@ -1660,30 +1395,6 @@ def test_combine_first(self, data, request, using_array_manager): ) super().test_combine_first(data) - @pytest.mark.parametrize("frame", [True, False]) - @pytest.mark.parametrize( - "periods, indices", - [(-2, [2, 3, 4, -1, -1]), (0, [0, 1, 2, 3, 4]), (2, [-1, -1, 0, 1, 2])], - ) - def test_container_shift( - self, data, frame, periods, indices, request, using_array_manager - ): - pa_dtype = data.dtype.pyarrow_dtype - if ( - using_array_manager - and pa.types.is_duration(pa_dtype) - and periods in (-2, 2) - ): - request.node.add_marker( - pytest.mark.xfail( - reason=( - f"Checking ndim when using arraymanager with " - f"{pa_dtype} and periods={periods}" - ) - ) - ) - super().test_container_shift(data, frame, periods, indices) - @pytest.mark.xfail( reason="result dtype pyarrow[bool] better than expected dtype object" ) @@ -1711,15 +1422,9 @@ def test_searchsorted(self, data_for_sorting, as_series, request): ) super().test_searchsorted(data_for_sorting, as_series) - def test_where_series(self, data, na_value, as_frame, request, using_array_manager): + def test_where_series(self, data, na_value, as_frame, request): pa_dtype = data.dtype.pyarrow_dtype - if using_array_manager and pa.types.is_duration(pa_dtype): - request.node.add_marker( - pytest.mark.xfail( - reason=f"Checking ndim when using arraymanager with {pa_dtype}" - ) - ) - elif pa.types.is_temporal(pa_dtype): + if pa.types.is_temporal(pa_dtype): request.node.add_marker( pytest.mark.xfail( raises=pa.ArrowNotImplementedError,