diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index ad190b9ed17..a85e7d737dc 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -663,30 +663,33 @@ def test_groupby_dataset_reduce() -> None: assert_allclose(expected, actual) -def test_groupby_dataset_math() -> None: +@pytest.mark.parametrize("squeeze", [True, False]) +def test_groupby_dataset_math(squeeze) -> None: def reorder_dims(x): return x.transpose("dim1", "dim2", "dim3", "time") ds = create_test_data() ds["dim1"] = ds["dim1"] - for squeeze in [True, False]: - grouped = ds.groupby("dim1", squeeze=squeeze) + grouped = ds.groupby("dim1", squeeze=squeeze) - expected = reorder_dims(ds + ds.coords["dim1"]) - actual = grouped + ds.coords["dim1"] - assert_identical(expected, reorder_dims(actual)) + expected = reorder_dims(ds + ds.coords["dim1"]) + actual = grouped + ds.coords["dim1"] + assert_identical(expected, reorder_dims(actual)) - actual = ds.coords["dim1"] + grouped - assert_identical(expected, reorder_dims(actual)) + actual = ds.coords["dim1"] + grouped + assert_identical(expected, reorder_dims(actual)) - ds2 = 2 * ds - expected = reorder_dims(ds + ds2) - actual = grouped + ds2 - assert_identical(expected, reorder_dims(actual)) + ds2 = 2 * ds + expected = reorder_dims(ds + ds2) + actual = grouped + ds2 + assert_identical(expected, reorder_dims(actual)) - actual = ds2 + grouped - assert_identical(expected, reorder_dims(actual)) + actual = ds2 + grouped + assert_identical(expected, reorder_dims(actual)) + +def test_groupby_math_more() -> None: + ds = create_test_data() grouped = ds.groupby("numbers") zeros = DataArray([0, 0, 0, 0], [("numbers", range(4))]) expected = (ds + Variable("dim3", np.zeros(10))).transpose( @@ -719,6 +722,58 @@ def reorder_dims(x): ds + ds.groupby("time.month") +@pytest.mark.parametrize("indexed_coord", [True, False]) +def test_groupby_bins_math(indexed_coord) -> None: + N = 7 + da = DataArray(np.random.random((N, N)), dims=("x", "y")) + if indexed_coord: + da["x"] = np.arange(N) + da["y"] = np.arange(N) + g = da.groupby_bins("x", np.arange(0, N + 1, 3)) + mean = g.mean() + expected = da.isel(x=slice(1, None)) - mean.isel(x_bins=("x", [0, 0, 0, 1, 1, 1])) + actual = g - mean + assert_identical(expected, actual) + + +def test_groupby_math_nD_group() -> None: + N = 40 + da = DataArray( + np.random.random((N, N)), + dims=("x", "y"), + coords={ + "labels": ( + "x", + np.repeat(["a", "b", "c", "d", "e", "f", "g", "h"], repeats=N // 8), + ), + }, + ) + da["labels2d"] = xr.broadcast(da.labels, da)[0] + + g = da.groupby("labels2d") + mean = g.mean() + expected = da - mean.sel(labels2d=da.labels2d) + expected["labels"] = expected.labels.broadcast_like(expected.labels2d) + actual = g - mean + assert_identical(expected, actual) + + da["num"] = ( + "x", + np.repeat([1, 2, 3, 4, 5, 6, 7, 8], repeats=N // 8), + ) + da["num2d"] = xr.broadcast(da.num, da)[0] + g = da.groupby_bins("num2d", bins=[0, 4, 6]) + mean = g.mean() + idxr = np.digitize(da.num2d, bins=(0, 4, 6), right=True)[:30, :] - 1 + expanded_mean = mean.drop("num2d_bins").isel(num2d_bins=(("x", "y"), idxr)) + expected = da.isel(x=slice(30)) - expanded_mean + expected["labels"] = expected.labels.broadcast_like(expected.labels2d) + expected["num"] = expected.num.broadcast_like(expected.num2d) + expected["num2d_bins"] = (("x", "y"), mean.num2d_bins.data[idxr]) + actual = g - mean + assert_identical(expected, actual) + + def test_groupby_dataset_math_virtual() -> None: ds = Dataset({"x": ("t", [1, 2, 3])}, {"t": pd.date_range("20100101", periods=3)}) grouped = ds.groupby("t.day")