@@ -18,7 +18,7 @@ def guess_iteration_block_size(x, dimension, memory: int = 10000000) -> int:
18
18
return choose_block_size_for_1d_iteration (x , dimension , memory )
19
19
20
20
21
- def choose_block_size_for_1d_iteration (x , dimension : int , memory : int = 10000000 ) -> int :
21
+ def choose_block_size_for_1d_iteration (x , dimension : int , buffer_size : int = 10000000 ) -> int :
22
22
"""
23
23
Choose a block size for iterating over an array on a certain dimension,
24
24
see `~apply_over_dimension` for more details.
@@ -28,7 +28,9 @@ def choose_block_size_for_1d_iteration(x, dimension: int, memory: int = 10000000
28
28
29
29
dimension: Dimension to iterate over.
30
30
31
- memory: Available memory in bytes, to hold a single block in memory.
31
+ buffer_size:
32
+ Buffer_size in bytes, to hold a single block per iteration. Larger
33
+ values generally improve speed at the cost of memory.
32
34
33
35
Returns:
34
36
Size of the block on the iteration dimension. This is guaranteed to be
@@ -46,7 +48,7 @@ def choose_block_size_for_1d_iteration(x, dimension: int, memory: int = 10000000
46
48
if i != dimension :
47
49
prod_other *= s
48
50
49
- num_elements = memory / x .dtype .itemsize
51
+ num_elements = buffer_size / x .dtype .itemsize
50
52
ideal = int (num_elements / prod_other )
51
53
if ideal == 0 :
52
54
return 1
@@ -59,7 +61,7 @@ def choose_block_size_for_1d_iteration(x, dimension: int, memory: int = 10000000
59
61
return int (ideal / curdim ) * curdim
60
62
61
63
62
- def apply_over_dimension (x , dimension : int , fun : Callable , block_size : Optional [int ] = None , allow_sparse : bool = False ) -> list :
64
+ def apply_over_dimension (x , dimension : int , fun : Callable , block_size : Optional [int ] = None , allow_sparse : bool = False , buffer_size : int = 1e8 ) -> list :
63
65
"""
64
66
Iterate over an array on a certain dimension. At each iteration, the block
65
67
of observations consists of the full extent of all dimensions other than
@@ -87,11 +89,16 @@ def apply_over_dimension(x, dimension: int, fun: Callable, block_size: Optional[
87
89
``x`` contains a sparse array, the block contents are instead
88
90
represented by a :py:class:`~SparseNdarray.SparseNdarray`.
89
91
92
+ buffer_size:
93
+ Buffer_size in bytes, to hold a single block per iteration. Larger
94
+ values generally improve speed at the cost of memory. Only used
95
+ if ``block_size`` is not provided.
96
+
90
97
Returns:
91
98
List containing the output of ``fun`` on each block.
92
99
"""
93
100
if block_size is None :
94
- block_size = choose_block_size_for_1d_iteration (x , dimension )
101
+ block_size = choose_block_size_for_1d_iteration (x , dimension , buffer_size = buffer_size )
95
102
96
103
limit = x .shape [dimension ]
97
104
tasks = math .ceil (limit / block_size )
0 commit comments