parallelize sets numba cores to 1 (#1008)

selmanozleyen · pre-commit-ci[bot] · web-flow · commit a3b1b1e05364 · 2025-06-10T14:04:10.000+02:00
* init * save working progress * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * finalize the tests * finish tests and update docs * fix pyproject * fix serialization issue * remove test_parallelize to check the new speed of tests * add parallelize tests again * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * replace isolate * use xdist --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
diff --git a/pyproject.toml b/pyproject.toml
@@ -88,6 +88,7 @@ test = [
     "pytest-mock>=3.5.0",
     "pytest-cov>=4",
     "coverage[toml]>=7",
+    "psutil",
 ]
 docs = [
     "ipython",
@@ -231,3 +232,4 @@ ban-relative-imports = "all"
 filterwarnings = [
     "error::numba.NumbaPerformanceWarning"
 ]
+addopts = "-n auto"
diff --git a/src/squidpy/_docs.py b/src/squidpy/_docs.py
@@ -107,8 +107,8 @@ def decorator2(obj: Any) -> Any:
 """
 _parallelize = """\
 n_jobs
-    Number of parallel jobs to use. If the function uses numba compiled functions, numba may
-    use cores depending on the number of threads set in the environment regardless of this argument.
+    Number of parallel jobs to use. The number of cores used by numba will be set to 1 regardless of this argument
+    since the backend will create a new process or thread for each job.
 backend
     Parallelization backend to use. See :class:`joblib.Parallel` for available options.
 show_progress_bar
diff --git a/src/squidpy/_utils.py b/src/squidpy/_utils.py
@@ -14,6 +14,7 @@
 from typing import TYPE_CHECKING, Any
 
 import joblib as jl
+import numba
 import numpy as np
 
 __all__ = ["singledispatchmethod", "Signal", "SigQueue", "NDArray", "NDArrayA"]
@@ -55,6 +56,11 @@ def _unique_order_preserving(
     return [i for i in iterable if not (i in seen or seen_add(i))], seen
 
 
+def _callback_wrapper(chosen_runner: Callable[..., Any], *args: Any, **kwargs: Any) -> Any:
+    numba.set_num_threads(1)
+    return chosen_runner(*args, **kwargs)
+
+
 class Signal(Enum):
     """Signaling values when informing parallelizer."""
 
@@ -163,18 +169,21 @@ def update(pbar: tqdm.std.tqdm, queue: SigQueue, n_total: int) -> None:
         if pbar is not None:
             pbar.close()
 
+    chosen_runner = runner if use_runner else callback
+
     def wrapper(*args: Any, **kwargs: Any) -> Any:
+        numba.set_num_threads(1)
         if pass_queue and show_progress_bar:
             pbar = None if tqdm is None else tqdm(total=col_len, unit=unit)
             queue = Manager().Queue()
-            thread = Thread(target=update, args=(pbar, queue, len(collections)))
+            thread = Thread(target=update, args=(pbar, queue, len(collections)), name="ParallelizeUpdateThread")
             thread.start()
         else:
             pbar, queue, thread = None, None, None
 
         res = jl.Parallel(n_jobs=n_jobs, backend=backend)(
-            jl.delayed(runner if use_runner else callback)(
-                *((i, cs) if use_ixs else (cs,)),
+            jl.delayed(_callback_wrapper)(
+                *((chosen_runner, i, cs) if use_ixs else (chosen_runner, cs)),
                 *args,
                 **kwargs,
                 queue=queue,
diff --git a/tests/utils/test_parallelize.py b/tests/utils/test_parallelize.py
@@ -0,0 +1,104 @@
+"""Tests for verifying process/thread usage in parallelized functions."""
+
+from __future__ import annotations
+
+import time
+from collections.abc import Callable
+from functools import partial
+
+import dask.array as da
+import numba
+import numpy as np
+import psutil
+import pytest  # type: ignore[import]
+
+from squidpy._utils import Signal, parallelize
+
+# Functions to be parallelized
+
+
+@numba.njit(parallel=True)
+def numba_parallel_func(x, y) -> np.ndarray:
+    return x * 2 + y
+
+
+@numba.njit(parallel=False)
+def numba_serial_func(x, y) -> np.ndarray:
+    return x * 2 + y
+
+
+def dask_func(x, y) -> np.ndarray:
+    return (da.from_array(x) * 2 + y).compute()
+
+
+def vanilla_func(x, y) -> np.ndarray:
+    return x * 2 + y
+
+
+# Mock runner function
+
+
+def mock_runner(x, y, queue, func):
+    for i in range(len(x)):
+        x[i] = func(x[i], y)
+        if queue is not None:
+            queue.put(Signal.UPDATE)
+    if queue is not None:
+        queue.put(Signal.FINISH)
+    return x
+
+
+@pytest.fixture(params=["numba_parallel", "numba_serial", "dask", "vanilla"])
+def func(request) -> Callable:
+    return {
+        "numba_parallel": numba_parallel_func,
+        "numba_serial": numba_serial_func,
+        "dask": dask_func,
+        "vanilla": vanilla_func,
+    }[request.param]
+
+
+@pytest.mark.timeout(60)
+@pytest.mark.parametrize("n_jobs", [1, 2, 8])
+def test_parallelize_loky(func, n_jobs):
+    start_time = time.time()
+    seed = 42
+    rng = np.random.RandomState(seed)
+    n = 8
+    arr1 = [rng.randint(0, 100, n) for _ in range(n)]
+    arr2 = np.arange(n)
+    runner = partial(mock_runner, func=func)
+    # this is the expected result of the function
+    expected = [func(arr1[i], arr2) for i in range(len(arr1))]
+    # this will be set to something other than 1,2,8
+    # we want to check if setting the threads works
+    # then after the function is run if the numba cores are set back to 1
+    old_num_threads = 3
+    numba.set_num_threads(old_num_threads)
+    # Get initial state
+    initial_process = psutil.Process()
+    initial_children = {p.pid for p in initial_process.children(recursive=True)}
+    initial_children = {psutil.Process(pid) for pid in initial_children}
+    init_numba_threads = numba.get_num_threads()
+
+    p_func = parallelize(runner, arr1, n_jobs=n_jobs, backend="loky", use_ixs=False, n_split=1)
+    result = p_func(arr2)[0]
+
+    final_children = {p.pid for p in initial_process.children(recursive=True)}
+    final_numba_threads = numba.get_num_threads()
+
+    assert init_numba_threads == old_num_threads, "Numba threads should not change"
+    assert final_numba_threads == 1, "Numba threads should be 1"
+    assert len(result) == len(expected), f"Expected: {expected} but got {result}. Length mismatch"
+    for i in range(len(arr1)):
+        assert np.all(result[i] == expected[i]), f"Expected {expected[i]} but got {result[i]}"
+
+    processes = final_children - initial_children
+
+    processes = {psutil.Process(pid) for pid in processes}
+    processes = {p for p in processes if not any("resource_tracker" in cl for cl in p.cmdline())}
+    if n_jobs > 1:  # expect exactly n_jobs
+        assert len(processes) == n_jobs, f"Unexpected processes created or not created: {processes}"
+    else:  # some functions use the main process others use a new process
+        processes = {p for p in processes if p.create_time() > start_time}
+        assert len(processes) <= 1, f"Unexpected processes created or not created: {processes}"

Original file line number	Diff line number	Diff line change
`@@ -88,6 +88,7 @@ test = [`
`88`	`88`	`"pytest-mock>=3.5.0",`
`89`	`89`	`"pytest-cov>=4",`
`90`	`90`	`"coverage[toml]>=7",`
	`91`	`+ "psutil",`
`91`	`92`	`]`
`92`	`93`	`docs = [`
`93`	`94`	`"ipython",`
`@@ -231,3 +232,4 @@ ban-relative-imports = "all"`
`231`	`232`	`filterwarnings = [`
`232`	`233`	`"error::numba.NumbaPerformanceWarning"`
`233`	`234`	`]`
	`235`	`+addopts = "-n auto"`