From 80b2b361059fcc5b6d2ffe36cb57f02749ca2884 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Sun, 17 Dec 2023 20:26:17 -0700 Subject: [PATCH] Reduce more warnings --- xarray/core/_aggregations.py | 132 ++++++++++---------- xarray/core/groupby.py | 180 ++++++++++++++++++++++++--- xarray/core/resample.py | 92 +++++++++++++- xarray/tests/test_groupby.py | 72 +++++++++-- xarray/util/generate_aggregations.py | 4 +- 5 files changed, 387 insertions(+), 93 deletions(-) diff --git a/xarray/core/_aggregations.py b/xarray/core/_aggregations.py index 89cec94e24f..ff8e62121f3 100644 --- a/xarray/core/_aggregations.py +++ b/xarray/core/_aggregations.py @@ -2424,7 +2424,7 @@ def count( **kwargs, ) else: - return self.reduce( + return self._reduce_without_squeeze_warn( duck_array_ops.count, dim=dim, numeric_only=False, @@ -2522,7 +2522,7 @@ def all( **kwargs, ) else: - return self.reduce( + return self._reduce_without_squeeze_warn( duck_array_ops.array_all, dim=dim, numeric_only=False, @@ -2620,7 +2620,7 @@ def any( **kwargs, ) else: - return self.reduce( + return self._reduce_without_squeeze_warn( duck_array_ops.array_any, dim=dim, numeric_only=False, @@ -2735,7 +2735,7 @@ def max( **kwargs, ) else: - return self.reduce( + return self._reduce_without_squeeze_warn( duck_array_ops.max, dim=dim, skipna=skipna, @@ -2851,7 +2851,7 @@ def min( **kwargs, ) else: - return self.reduce( + return self._reduce_without_squeeze_warn( duck_array_ops.min, dim=dim, skipna=skipna, @@ -2969,7 +2969,7 @@ def mean( **kwargs, ) else: - return self.reduce( + return self._reduce_without_squeeze_warn( duck_array_ops.mean, dim=dim, skipna=skipna, @@ -3105,7 +3105,7 @@ def prod( **kwargs, ) else: - return self.reduce( + return self._reduce_without_squeeze_warn( duck_array_ops.prod, dim=dim, skipna=skipna, @@ -3242,7 +3242,7 @@ def sum( **kwargs, ) else: - return self.reduce( + return self._reduce_without_squeeze_warn( duck_array_ops.sum, dim=dim, skipna=skipna, @@ -3376,7 +3376,7 @@ def std( **kwargs, ) else: - return self.reduce( + return self._reduce_without_squeeze_warn( duck_array_ops.std, dim=dim, skipna=skipna, @@ -3510,7 +3510,7 @@ def var( **kwargs, ) else: - return self.reduce( + return self._reduce_without_squeeze_warn( duck_array_ops.var, dim=dim, skipna=skipna, @@ -3614,7 +3614,7 @@ def median( Data variables: da (labels) float64 nan 2.0 1.5 """ - return self.reduce( + return self._reduce_without_squeeze_warn( duck_array_ops.median, dim=dim, skipna=skipna, @@ -3715,7 +3715,7 @@ def cumsum( Data variables: da (time) float64 1.0 2.0 3.0 3.0 4.0 nan """ - return self.reduce( + return self._reduce_without_squeeze_warn( duck_array_ops.cumsum, dim=dim, skipna=skipna, @@ -3816,7 +3816,7 @@ def cumprod( Data variables: da (time) float64 1.0 2.0 3.0 0.0 4.0 nan """ - return self.reduce( + return self._reduce_without_squeeze_warn( duck_array_ops.cumprod, dim=dim, skipna=skipna, @@ -3938,7 +3938,7 @@ def count( **kwargs, ) else: - return self.reduce( + return self._reduce_without_squeeze_warn( duck_array_ops.count, dim=dim, numeric_only=False, @@ -4036,7 +4036,7 @@ def all( **kwargs, ) else: - return self.reduce( + return self._reduce_without_squeeze_warn( duck_array_ops.array_all, dim=dim, numeric_only=False, @@ -4134,7 +4134,7 @@ def any( **kwargs, ) else: - return self.reduce( + return self._reduce_without_squeeze_warn( duck_array_ops.array_any, dim=dim, numeric_only=False, @@ -4249,7 +4249,7 @@ def max( **kwargs, ) else: - return self.reduce( + return self._reduce_without_squeeze_warn( duck_array_ops.max, dim=dim, skipna=skipna, @@ -4365,7 +4365,7 @@ def min( **kwargs, ) else: - return self.reduce( + return self._reduce_without_squeeze_warn( duck_array_ops.min, dim=dim, skipna=skipna, @@ -4483,7 +4483,7 @@ def mean( **kwargs, ) else: - return self.reduce( + return self._reduce_without_squeeze_warn( duck_array_ops.mean, dim=dim, skipna=skipna, @@ -4619,7 +4619,7 @@ def prod( **kwargs, ) else: - return self.reduce( + return self._reduce_without_squeeze_warn( duck_array_ops.prod, dim=dim, skipna=skipna, @@ -4756,7 +4756,7 @@ def sum( **kwargs, ) else: - return self.reduce( + return self._reduce_without_squeeze_warn( duck_array_ops.sum, dim=dim, skipna=skipna, @@ -4890,7 +4890,7 @@ def std( **kwargs, ) else: - return self.reduce( + return self._reduce_without_squeeze_warn( duck_array_ops.std, dim=dim, skipna=skipna, @@ -5024,7 +5024,7 @@ def var( **kwargs, ) else: - return self.reduce( + return self._reduce_without_squeeze_warn( duck_array_ops.var, dim=dim, skipna=skipna, @@ -5112,23 +5112,23 @@ def median( >>> ds.resample(time="3M").median() - Dimensions: (time: 3) + Dimensions: (__resample_dim__: 3) Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + * __resample_dim__ (__resample_dim__) datetime64[ns] 2001-01-31 ... 2001-0... Data variables: - da (time) float64 1.0 2.0 2.0 + da (__resample_dim__) float64 1.0 2.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3M").median(skipna=False) - Dimensions: (time: 3) + Dimensions: (__resample_dim__: 3) Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + * __resample_dim__ (__resample_dim__) datetime64[ns] 2001-01-31 ... 2001-0... Data variables: - da (time) float64 1.0 2.0 nan + da (__resample_dim__) float64 1.0 2.0 nan """ - return self.reduce( + return self._reduce_without_squeeze_warn( duck_array_ops.median, dim=dim, skipna=skipna, @@ -5229,7 +5229,7 @@ def cumsum( Data variables: da (time) float64 1.0 2.0 5.0 5.0 2.0 nan """ - return self.reduce( + return self._reduce_without_squeeze_warn( duck_array_ops.cumsum, dim=dim, skipna=skipna, @@ -5330,7 +5330,7 @@ def cumprod( Data variables: da (time) float64 1.0 2.0 6.0 0.0 2.0 nan """ - return self.reduce( + return self._reduce_without_squeeze_warn( duck_array_ops.cumprod, dim=dim, skipna=skipna, @@ -5446,7 +5446,7 @@ def count( **kwargs, ) else: - return self.reduce( + return self._reduce_without_squeeze_warn( duck_array_ops.count, dim=dim, keep_attrs=keep_attrs, @@ -5537,7 +5537,7 @@ def all( **kwargs, ) else: - return self.reduce( + return self._reduce_without_squeeze_warn( duck_array_ops.array_all, dim=dim, keep_attrs=keep_attrs, @@ -5628,7 +5628,7 @@ def any( **kwargs, ) else: - return self.reduce( + return self._reduce_without_squeeze_warn( duck_array_ops.array_any, dim=dim, keep_attrs=keep_attrs, @@ -5734,7 +5734,7 @@ def max( **kwargs, ) else: - return self.reduce( + return self._reduce_without_squeeze_warn( duck_array_ops.max, dim=dim, skipna=skipna, @@ -5841,7 +5841,7 @@ def min( **kwargs, ) else: - return self.reduce( + return self._reduce_without_squeeze_warn( duck_array_ops.min, dim=dim, skipna=skipna, @@ -5950,7 +5950,7 @@ def mean( **kwargs, ) else: - return self.reduce( + return self._reduce_without_squeeze_warn( duck_array_ops.mean, dim=dim, skipna=skipna, @@ -6075,7 +6075,7 @@ def prod( **kwargs, ) else: - return self.reduce( + return self._reduce_without_squeeze_warn( duck_array_ops.prod, dim=dim, skipna=skipna, @@ -6201,7 +6201,7 @@ def sum( **kwargs, ) else: - return self.reduce( + return self._reduce_without_squeeze_warn( duck_array_ops.sum, dim=dim, skipna=skipna, @@ -6324,7 +6324,7 @@ def std( **kwargs, ) else: - return self.reduce( + return self._reduce_without_squeeze_warn( duck_array_ops.std, dim=dim, skipna=skipna, @@ -6447,7 +6447,7 @@ def var( **kwargs, ) else: - return self.reduce( + return self._reduce_without_squeeze_warn( duck_array_ops.var, dim=dim, skipna=skipna, @@ -6543,7 +6543,7 @@ def median( Coordinates: * labels (labels) object 'a' 'b' 'c' """ - return self.reduce( + return self._reduce_without_squeeze_warn( duck_array_ops.median, dim=dim, skipna=skipna, @@ -6640,7 +6640,7 @@ def cumsum( * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.resample(time="3M").median() - + array([1., 2., 2.]) Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + * __resample_dim__ (__resample_dim__) datetime64[ns] 2001-01-31 ... 2001-0... Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3M").median(skipna=False) - + array([ 1., 2., nan]) Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + * __resample_dim__ (__resample_dim__) datetime64[ns] 2001-01-31 ... 2001-0... """ - return self.reduce( + return self._reduce_without_squeeze_warn( duck_array_ops.median, dim=dim, skipna=skipna, @@ -8034,8 +8034,8 @@ def cumsum( array([1., 2., 5., 5., 2., 2.]) Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) array([ 1., 2., 5., 5., 2., nan]) Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) array([1., 2., 6., 0., 2., 2.]) Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) array([ 1., 2., 6., 0., 2., nan]) Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) str: ", ".join(format_array_flat(grouper.full_index, 30).split()), ) - def _iter_grouped(self) -> Iterator[T_Xarray]: + def _iter_grouped(self, warn_squeeze=True) -> Iterator[T_Xarray]: """Iterate over each element in this group""" (grouper,) = self.groupers for idx, indices in enumerate(self._group_indices): indices = _maybe_squeeze_indices( - indices, self._squeeze, grouper, warn=idx == 0 + indices, self._squeeze, grouper, warn=warn_squeeze and idx == 0 ) yield self._obj.isel({self._group_dim: indices}) @@ -1356,7 +1357,7 @@ def dims(self) -> tuple[Hashable, ...]: return self._dims - def _iter_grouped_shortcut(self): + def _iter_grouped_shortcut(self, warn_squeeze=True): """Fast version of `_iter_grouped` that yields Variables without metadata """ @@ -1364,7 +1365,7 @@ def _iter_grouped_shortcut(self): (grouper,) = self.groupers for idx, indices in enumerate(self._group_indices): indices = _maybe_squeeze_indices( - indices, self._squeeze, grouper, warn=idx == 0 + indices, self._squeeze, grouper, warn=warn_squeeze and idx == 0 ) yield var[{self._group_dim: indices}] @@ -1445,7 +1446,24 @@ def map( applied : DataArray The result of splitting, applying and combining this array. """ - grouped = self._iter_grouped_shortcut() if shortcut else self._iter_grouped() + return self._map_maybe_warn( + func, args, warn_squeeze=True, shortcut=shortcut, **kwargs + ) + + def _map_maybe_warn( + self, + func: Callable[..., DataArray], + args: tuple[Any, ...] = (), + *, + warn_squeeze: bool = True, + shortcut: bool | None = None, + **kwargs: Any, + ) -> DataArray: + grouped = ( + self._iter_grouped_shortcut(warn_squeeze) + if shortcut + else self._iter_grouped(warn_squeeze) + ) applied = (maybe_wrap_array(arr, func(arr, *args, **kwargs)) for arr in grouped) return self._combine(applied, shortcut=shortcut) @@ -1547,6 +1565,68 @@ def reduce_array(ar: DataArray) -> DataArray: return self.map(reduce_array, shortcut=shortcut) + def _reduce_without_squeeze_warn( + self, + func: Callable[..., Any], + dim: Dims = None, + *, + axis: int | Sequence[int] | None = None, + keep_attrs: bool | None = None, + keepdims: bool = False, + shortcut: bool = True, + **kwargs: Any, + ) -> DataArray: + """Reduce the items in this group by applying `func` along some + dimension(s). + + Parameters + ---------- + func : callable + Function which can be called in the form + `func(x, axis=axis, **kwargs)` to return the result of collapsing + an np.ndarray over an integer valued axis. + dim : "...", str, Iterable of Hashable or None, optional + Dimension(s) over which to apply `func`. If None, apply over the + groupby dimension, if "..." apply over all dimensions. + axis : int or sequence of int, optional + Axis(es) over which to apply `func`. Only one of the 'dimension' + and 'axis' arguments can be supplied. If neither are supplied, then + `func` is calculated over all dimension for each group item. + keep_attrs : bool, optional + If True, the datasets's attributes (`attrs`) will be copied from + the original object to the new one. If False (default), the new + object will be returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to `func`. + + Returns + ------- + reduced : Array + Array with summarized data and the indicated dimension(s) + removed. + """ + if dim is None: + dim = [self._group_dim] + + if keep_attrs is None: + keep_attrs = _get_keep_attrs(default=True) + + def reduce_array(ar: DataArray) -> DataArray: + return ar.reduce( + func=func, + dim=dim, + axis=axis, + keep_attrs=keep_attrs, + keepdims=keepdims, + **kwargs, + ) + + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", message="The `squeeze` kwarg") + check_reduce_dims(dim, self.dims) + + return self._map_maybe_warn(reduce_array, shortcut=shortcut, warn_squeeze=False) + # https://github.com/python/mypy/issues/9031 class DataArrayGroupBy( # type: ignore[misc] @@ -1611,8 +1691,18 @@ def map( applied : Dataset The result of splitting, applying and combining this dataset. """ + return self._map_maybe_warn(func, args, shortcut, warn_squeeze=True, **kwargs) + + def _map_maybe_warn( + self, + func: Callable[..., Dataset], + args: tuple[Any, ...] = (), + shortcut: bool | None = None, + warn_squeeze: bool = False, + **kwargs: Any, + ) -> Dataset: # ignore shortcut if set (for now) - applied = (func(ds, *args, **kwargs) for ds in self._iter_grouped()) + applied = (func(ds, *args, **kwargs) for ds in self._iter_grouped(warn_squeeze)) return self._combine(applied) def apply(self, func, args=(), shortcut=None, **kwargs): @@ -1707,6 +1797,68 @@ def reduce_dataset(ds: Dataset) -> Dataset: return self.map(reduce_dataset) + def _reduce_without_squeeze_warn( + self, + func: Callable[..., Any], + dim: Dims = None, + *, + axis: int | Sequence[int] | None = None, + keep_attrs: bool | None = None, + keepdims: bool = False, + shortcut: bool = True, + **kwargs: Any, + ) -> Dataset: + """Reduce the items in this group by applying `func` along some + dimension(s). + + Parameters + ---------- + func : callable + Function which can be called in the form + `func(x, axis=axis, **kwargs)` to return the result of collapsing + an np.ndarray over an integer valued axis. + dim : ..., str, Iterable of Hashable or None, optional + Dimension(s) over which to apply `func`. By default apply over the + groupby dimension, with "..." apply over all dimensions. + axis : int or sequence of int, optional + Axis(es) over which to apply `func`. Only one of the 'dimension' + and 'axis' arguments can be supplied. If neither are supplied, then + `func` is calculated over all dimension for each group item. + keep_attrs : bool, optional + If True, the datasets's attributes (`attrs`) will be copied from + the original object to the new one. If False (default), the new + object will be returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to `func`. + + Returns + ------- + reduced : Dataset + Array with summarized data and the indicated dimension(s) + removed. + """ + if dim is None: + dim = [self._group_dim] + + if keep_attrs is None: + keep_attrs = _get_keep_attrs(default=True) + + def reduce_dataset(ds: Dataset) -> Dataset: + return ds.reduce( + func=func, + dim=dim, + axis=axis, + keep_attrs=keep_attrs, + keepdims=keepdims, + **kwargs, + ) + + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", message="The `squeeze` kwarg") + check_reduce_dims(dim, self.dims) + + return self._map_maybe_warn(reduce_dataset, warn_squeeze=False) + def assign(self, **kwargs: Any) -> Dataset: """Assign data variables by group. diff --git a/xarray/core/resample.py b/xarray/core/resample.py index c93faa31612..3bb158acfdb 100644 --- a/xarray/core/resample.py +++ b/xarray/core/resample.py @@ -188,6 +188,51 @@ class DataArrayResample(Resample["DataArray"], DataArrayGroupByBase, DataArrayRe specified dimension """ + def reduce( + self, + func: Callable[..., Any], + dim: Dims = None, + *, + axis: int | Sequence[int] | None = None, + keep_attrs: bool | None = None, + keepdims: bool = False, + shortcut: bool = True, + **kwargs: Any, + ) -> DataArray: + """Reduce the items in this group by applying `func` along the + pre-defined resampling dimension. + + Parameters + ---------- + func : callable + Function which can be called in the form + `func(x, axis=axis, **kwargs)` to return the result of collapsing + an np.ndarray over an integer valued axis. + dim : "...", str, Iterable of Hashable or None, optional + Dimension(s) over which to apply `func`. + keep_attrs : bool, optional + If True, the datasets's attributes (`attrs`) will be copied from + the original object to the new one. If False (default), the new + object will be returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to `func`. + + Returns + ------- + reduced : DataArray + Array with summarized data and the indicated dimension(s) + removed. + """ + return super().reduce( + func=func, + dim=dim, + axis=axis, + keep_attrs=keep_attrs, + keepdims=keepdims, + shortcut=shortcut, + **kwargs, + ) + def map( self, func: Callable[..., Any], @@ -236,9 +281,21 @@ def map( applied : DataArray The result of splitting, applying and combining this array. """ + return self._map_maybe_warn(func, args, shortcut, warn_squeeze=True, **kwargs) + + def _map_maybe_warn( + self, + func: Callable[..., Any], + args: tuple[Any, ...] = (), + shortcut: bool | None = False, + warn_squeeze: bool = True, + **kwargs: Any, + ) -> DataArray: # TODO: the argument order for Resample doesn't match that for its parent, # GroupBy - combined = super().map(func, shortcut=shortcut, args=args, **kwargs) + combined = super()._map_maybe_warn( + func, shortcut=shortcut, args=args, warn_squeeze=warn_squeeze, **kwargs + ) # If the aggregation function didn't drop the original resampling # dimension, then we need to do so before we can rename the proxy @@ -318,8 +375,18 @@ def map( applied : Dataset The result of splitting, applying and combining this dataset. """ + return self._map_maybe_warn(func, args, shortcut, warn_squeeze=True, **kwargs) + + def _map_maybe_warn( + self, + func: Callable[..., Any], + args: tuple[Any, ...] = (), + shortcut: bool | None = None, + warn_squeeze: bool = True, + **kwargs: Any, + ) -> Dataset: # ignore shortcut if set (for now) - applied = (func(ds, *args, **kwargs) for ds in self._iter_grouped()) + applied = (func(ds, *args, **kwargs) for ds in self._iter_grouped(warn_squeeze)) combined = self._combine(applied) # If the aggregation function didn't drop the original resampling @@ -394,6 +461,27 @@ def reduce( **kwargs, ) + def _reduce_without_squeeze_warn( + self, + func: Callable[..., Any], + dim: Dims = None, + *, + axis: int | Sequence[int] | None = None, + keep_attrs: bool | None = None, + keepdims: bool = False, + shortcut: bool = True, + **kwargs: Any, + ) -> Dataset: + return super()._reduce_without_squeeze_warn( + func=func, + dim=dim, + axis=axis, + keep_attrs=keep_attrs, + keepdims=keepdims, + shortcut=shortcut, + **kwargs, + ) + def asfreq(self) -> Dataset: """Return values of original object at the new up-sampling frequency; essentially a re-index with new times set to NaN. diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 225b24d0aac..cdb4969a48e 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -67,21 +67,30 @@ def test_groupby_dims_property(dataset, recwarn) -> None: assert dataset.groupby("y").dims == dataset.isel(y=1).dims # when squeeze=False, no warning should be raised - assert dataset.groupby("x", squeeze=False).dims == dataset.isel(x=slice(1, 2)).dims - assert dataset.groupby("y", squeeze=False).dims == dataset.isel(y=slice(1, 2)).dims + assert tuple(dataset.groupby("x", squeeze=False).dims) == tuple( + dataset.isel(x=slice(1, 2)).dims + ) + assert tuple(dataset.groupby("y", squeeze=False).dims) == tuple( + dataset.isel(y=slice(1, 2)).dims + ) assert len(recwarn) == 0 stacked = dataset.stack({"xy": ("x", "y")}) - assert stacked.groupby("xy", squeeze=False).dims == stacked.isel(xy=[0]).dims + assert tuple(stacked.groupby("xy", squeeze=False).dims) == tuple( + stacked.isel(xy=[0]).dims + ) assert len(recwarn) == 0 def test_groupby_sizes_property(dataset) -> None: - assert dataset.groupby("x").sizes == dataset.isel(x=1).sizes - assert dataset.groupby("y").sizes == dataset.isel(y=1).sizes + with pytest.warns(UserWarning, match="The `squeeze` kwarg"): + assert dataset.groupby("x").sizes == dataset.isel(x=1).sizes + with pytest.warns(UserWarning, match="The `squeeze` kwarg"): + assert dataset.groupby("y").sizes == dataset.isel(y=1).sizes stacked = dataset.stack({"xy": ("x", "y")}) - assert stacked.groupby("xy").sizes == stacked.isel(xy=0).sizes + with pytest.warns(UserWarning, match="The `squeeze` kwarg"): + assert stacked.groupby("xy").sizes == stacked.isel(xy=0).sizes def test_multi_index_groupby_map(dataset) -> None: @@ -490,8 +499,10 @@ def test_da_groupby_assign_coords() -> None: actual = xr.DataArray( [[3, 4, 5], [6, 7, 8]], dims=["y", "x"], coords={"y": range(2), "x": range(3)} ) - actual1 = actual.groupby("x", squeeze=False).assign_coords({"y": [-1, -2]}) - actual2 = actual.groupby("x", squeeze=False).assign_coords(y=[-1, -2]) + with pytest.warns(UserWarning, match="The `squeeze` kwarg"): + actual1 = actual.groupby("x").assign_coords({"y": [-1, -2]}) + with pytest.warns(UserWarning, match="The `squeeze` kwarg"): + actual2 = actual.groupby("x").assign_coords(y=[-1, -2]) expected = xr.DataArray( [[3, 4, 5], [6, 7, 8]], dims=["y", "x"], coords={"y": [-1, -2], "x": range(3)} ) @@ -639,13 +650,17 @@ def test_groupby_grouping_errors() -> None: def test_groupby_reduce_dimension_error(array) -> None: grouped = array.groupby("y") + # assert_identical(array, grouped.mean()) + with pytest.raises(ValueError, match=r"cannot reduce over dimensions"): grouped.mean("huh") with pytest.raises(ValueError, match=r"cannot reduce over dimensions"): grouped.mean(("x", "y", "asd")) - assert_identical(array, grouped.mean()) + with pytest.warns(UserWarning, match="The `squeeze` kwarg"): + assert_identical(array.mean("x"), grouped.reduce(np.mean, "x")) + assert_allclose(array.mean(["x", "z"]), grouped.reduce(np.mean, ["x", "z"])) grouped = array.groupby("y", squeeze=False) assert_identical(array, grouped.mean()) @@ -688,6 +703,15 @@ def test_groupby_none_group_name() -> None: def test_groupby_getitem(dataset) -> None: + with pytest.warns(UserWarning, match="The `squeeze` kwarg"): + assert_identical(dataset.sel(x="a"), dataset.groupby("x")["a"]) + with pytest.warns(UserWarning, match="The `squeeze` kwarg"): + assert_identical(dataset.sel(z=1), dataset.groupby("z")[1]) + with pytest.warns(UserWarning, match="The `squeeze` kwarg"): + assert_identical(dataset.foo.sel(x="a"), dataset.foo.groupby("x")["a"]) + with pytest.warns(UserWarning, match="The `squeeze` kwarg"): + assert_identical(dataset.foo.sel(z=1), dataset.foo.groupby("z")[1]) + assert_identical(dataset.sel(x=["a"]), dataset.groupby("x", squeeze=False)["a"]) assert_identical(dataset.sel(z=[1]), dataset.groupby("z", squeeze=False)[1]) @@ -729,6 +753,36 @@ def identity(x): assert_equal(data, actual2) +def test_groupby_dataset_squeeze_None() -> None: + """Delete when removing squeeze.""" + data = Dataset( + {"z": (["x", "y"], np.random.randn(3, 5))}, + {"x": ("x", list("abc")), "c": ("x", [0, 1, 0]), "y": range(5)}, + ) + groupby = data.groupby("x") + assert len(groupby) == 3 + expected_groups = {"a": 0, "b": 1, "c": 2} + with pytest.warns(UserWarning, match="The `squeeze` kwarg"): + assert groupby.groups == expected_groups + expected_items = [ + ("a", data.isel(x=0)), + ("b", data.isel(x=1)), + ("c", data.isel(x=2)), + ] + with pytest.warns(UserWarning, match="The `squeeze` kwarg"): + for actual1, expected1 in zip(groupby, expected_items): + assert actual1[0] == expected1[0] + assert_equal(actual1[1], expected1[1]) + + def identity(x): + return x + + with pytest.warns(UserWarning, match="The `squeeze` kwarg"): + for k in ["x", "c"]: + actual2 = data.groupby(k).map(identity) + assert_equal(data, actual2) + + def test_groupby_dataset_returns_new_type() -> None: data = Dataset({"z": (["x", "y"], np.random.randn(3, 5))}) diff --git a/xarray/util/generate_aggregations.py b/xarray/util/generate_aggregations.py index 0811b571757..5135cfd615e 100644 --- a/xarray/util/generate_aggregations.py +++ b/xarray/util/generate_aggregations.py @@ -429,7 +429,7 @@ def generate_code(self, method, has_keep_attrs): if method_is_not_flox_supported: return f"""\ - return self.reduce( + return self._reduce_without_squeeze_warn( duck_array_ops.{method.array_method}, dim=dim,{extra_kwargs} keep_attrs=keep_attrs, @@ -451,7 +451,7 @@ def generate_code(self, method, has_keep_attrs): **kwargs, ) else: - return self.reduce( + return self._reduce_without_squeeze_warn( duck_array_ops.{method.array_method}, dim=dim,{extra_kwargs} keep_attrs=keep_attrs,