@@ -672,7 +672,7 @@ def test_groupby_drops_nans(shuffle: bool, chunk: Literal[False] | dict) -> None
672
672
ds ["variable" ] = ds ["variable" ].chunk (chunk )
673
673
grouped = ds .groupby (ds .id )
674
674
if shuffle :
675
- grouped = grouped .shuffle ()
675
+ grouped = grouped .distributed_shuffle ()
676
676
677
677
# non reduction operation
678
678
expected1 = ds .copy ()
@@ -1418,7 +1418,7 @@ def test_groupby_reductions(
1418
1418
with raise_if_dask_computes ():
1419
1419
grouped = array .groupby ("abc" )
1420
1420
if shuffle :
1421
- grouped = grouped .shuffle ()
1421
+ grouped = grouped .distributed_shuffle ()
1422
1422
1423
1423
with xr .set_options (use_flox = use_flox ):
1424
1424
actual = getattr (grouped , method )(dim = "y" )
@@ -1689,11 +1689,11 @@ def test_groupby_bins(
1689
1689
gb = array .groupby_bins ("dim_0" , bins = bins , ** cut_kwargs )
1690
1690
actual = gb .sum ()
1691
1691
assert_identical (expected , actual )
1692
- assert_identical (expected , gb .shuffle ().sum ())
1692
+ assert_identical (expected , gb .distributed_shuffle ().sum ())
1693
1693
1694
1694
actual = gb .map (lambda x : x .sum ())
1695
1695
assert_identical (expected , actual )
1696
- assert_identical (expected , gb .shuffle ().map (lambda x : x .sum ()))
1696
+ assert_identical (expected , gb .distributed_shuffle ().map (lambda x : x .sum ()))
1697
1697
1698
1698
# make sure original array dims are unchanged
1699
1699
assert len (array .dim_0 ) == 4
@@ -1880,14 +1880,14 @@ def resample_as_pandas(array, *args, **kwargs):
1880
1880
actual = rs .mean ()
1881
1881
expected = resample_as_pandas (array , resample_freq )
1882
1882
assert_identical (expected , actual )
1883
- assert_identical (expected , rs .shuffle ().mean ())
1883
+ assert_identical (expected , rs .distributed_shuffle ().mean ())
1884
1884
1885
1885
assert_identical (expected , rs .reduce (np .mean ))
1886
- assert_identical (expected , rs .shuffle ().reduce (np .mean ))
1886
+ assert_identical (expected , rs .distributed_shuffle ().reduce (np .mean ))
1887
1887
1888
1888
rs = array .resample (time = "24h" , closed = "right" )
1889
1889
actual = rs .mean ()
1890
- shuffled = rs .shuffle ().mean ()
1890
+ shuffled = rs .distributed_shuffle ().mean ()
1891
1891
expected = resample_as_pandas (array , "24h" , closed = "right" )
1892
1892
assert_identical (expected , actual )
1893
1893
assert_identical (expected , shuffled )
@@ -2832,7 +2832,7 @@ def test_multiple_groupers(use_flox: bool, shuffle: bool) -> None:
2832
2832
2833
2833
gb = da .groupby (labels1 = UniqueGrouper (), labels2 = UniqueGrouper ())
2834
2834
if shuffle :
2835
- gb = gb .shuffle ()
2835
+ gb = gb .distributed_shuffle ()
2836
2836
repr (gb )
2837
2837
2838
2838
expected = DataArray (
@@ -2853,7 +2853,7 @@ def test_multiple_groupers(use_flox: bool, shuffle: bool) -> None:
2853
2853
square = DataArray (np .arange (16 ).reshape (4 , 4 ), coords = coords , dims = ["x" , "y" ])
2854
2854
gb = square .groupby (a = UniqueGrouper (), b = UniqueGrouper ())
2855
2855
if shuffle :
2856
- gb = gb .shuffle ()
2856
+ gb = gb .distributed_shuffle ()
2857
2857
repr (gb )
2858
2858
with xr .set_options (use_flox = use_flox ):
2859
2859
actual = gb .mean ()
@@ -2878,14 +2878,14 @@ def test_multiple_groupers(use_flox: bool, shuffle: bool) -> None:
2878
2878
)
2879
2879
gb = b .groupby (x = UniqueGrouper (), y = UniqueGrouper ())
2880
2880
if shuffle :
2881
- gb = gb .shuffle ()
2881
+ gb = gb .distributed_shuffle ()
2882
2882
repr (gb )
2883
2883
with xr .set_options (use_flox = use_flox ):
2884
2884
assert_identical (gb .mean ("z" ), b .mean ("z" ))
2885
2885
2886
2886
gb = b .groupby (x = UniqueGrouper (), xy = UniqueGrouper ())
2887
2887
if shuffle :
2888
- gb = gb .shuffle ()
2888
+ gb = gb .distributed_shuffle ()
2889
2889
repr (gb )
2890
2890
with xr .set_options (use_flox = use_flox ):
2891
2891
actual = gb .mean ()
@@ -2939,7 +2939,7 @@ def test_multiple_groupers_mixed(use_flox: bool, shuffle: bool) -> None:
2939
2939
)
2940
2940
gb = ds .groupby (x = BinGrouper (bins = [5 , 15 , 25 ]), letters = UniqueGrouper ())
2941
2941
if shuffle :
2942
- gb = gb .shuffle ()
2942
+ gb = gb .distributed_shuffle ()
2943
2943
expected_data = np .array (
2944
2944
[
2945
2945
[[0.0 , np .nan ], [np .nan , 3.0 ]],
@@ -3176,12 +3176,12 @@ def test_shuffle_by_simple() -> None:
3176
3176
data = dask .array .from_array ([1 , 2 , 3 , 4 , 5 , 6 ], chunks = 2 ),
3177
3177
coords = {"label" : ("x" , "a b c a b c" .split (" " ))},
3178
3178
)
3179
- actual = da .shuffle_by (label = UniqueGrouper ())
3179
+ actual = da .distributed_shuffle_by (label = UniqueGrouper ())
3180
3180
expected = da .isel (x = [0 , 3 , 1 , 4 , 2 , 5 ])
3181
3181
assert_identical (actual , expected )
3182
3182
3183
3183
with pytest .raises (ValueError ):
3184
- da .chunk (x = 2 , eagerly_load_group = False ).shuffle_by ("label" )
3184
+ da .chunk (x = 2 , eagerly_load_group = False ).distributed_shuffle_by ("label" )
3185
3185
3186
3186
3187
3187
@requires_dask
0 commit comments