Fix the problems with the latest merge (#1009)

selmanozleyen · pre-commit-ci[bot] · web-flow · commit 349b75afbd26 · 2025-06-16T14:22:31.000+02:00
* save working progress * add addopts from yml * prettify test file * don't set the original threads as the wrapper function already does that * make sure tox passes the PYTEST_ADDOPTS env * set inner_max_num_threads=1 * set inner_max_num_threads only for loky * fix blunder * ensure all function are called with n_job processes * remove inner_max_num_threads to simplify stuff * clarify numba behaviour in the docs * increase timeout limit * increase the timeout again * just set inner_max_num_threads instead * clarify in the docs that oversubscription is only handled for loky backend * simplify and clean the parallelize test * fix doc formatting * add non loky alternative to test also the other case * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * undo multiprocessing test option bc it may be too slow for the ci's. (at least they pass) * increase timeout limit * or instead reduce the computation required * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add other cases but skip in CI also reduce computation required again * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * refactor skip markings --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -88,6 +88,7 @@ jobs:
                   MPLBACKEND: agg
                   PLATFORM: ${{ matrix.os }}
                   DISPLAY: :42
+                  PYTEST_ADDOPTS: "-n auto"
               run: |
                   tox -vv
             # check if this can be deprecated
diff --git a/pyproject.toml b/pyproject.toml
@@ -88,7 +88,7 @@ test = [
     "pytest-mock>=3.5.0",
     "pytest-cov>=4",
     "coverage[toml]>=7",
-    "psutil",
+    "pytest-timeout>=2.1.0",
 ]
 docs = [
     "ipython",
@@ -231,5 +231,4 @@ ban-relative-imports = "all"
 [tool.pytest.ini_options]
 filterwarnings = [
     "error::numba.NumbaPerformanceWarning"
-]
-addopts = "-n auto"
+]
diff --git a/src/squidpy/_docs.py b/src/squidpy/_docs.py
@@ -107,8 +107,14 @@ def decorator2(obj: Any) -> Any:
 """
 _parallelize = """\
 n_jobs
-    Number of parallel jobs to use. The number of cores used by numba will be set to 1 regardless of this argument
-    since the backend will create a new process or thread for each job.
+    Number of parallel jobs to use.
+    For ``backend="loky"``, the number of cores used by numba for
+    each job spawned by the backend will be set to 1 in order to
+    overcome the oversubscription issue in case you run
+    numba in your function to parallelize.
+    To set the absolute maximum number of threads in numba
+    for your python program, set the environment variable:
+    ``NUMBA_NUM_THREADS`` before running the program.
 backend
     Parallelization backend to use. See :class:`joblib.Parallel` for available options.
 show_progress_bar
diff --git a/src/squidpy/_utils.py b/src/squidpy/_utils.py
@@ -172,29 +172,29 @@ def update(pbar: tqdm.std.tqdm, queue: SigQueue, n_total: int) -> None:
     chosen_runner = runner if use_runner else callback
 
     def wrapper(*args: Any, **kwargs: Any) -> Any:
-        numba.set_num_threads(1)
         if pass_queue and show_progress_bar:
             pbar = None if tqdm is None else tqdm(total=col_len, unit=unit)
             queue = Manager().Queue()
             thread = Thread(target=update, args=(pbar, queue, len(collections)), name="ParallelizeUpdateThread")
             thread.start()
         else:
             pbar, queue, thread = None, None, None
-
-        res = jl.Parallel(n_jobs=n_jobs, backend=backend)(
-            jl.delayed(_callback_wrapper)(
-                *((chosen_runner, i, cs) if use_ixs else (chosen_runner, cs)),
-                *args,
-                **kwargs,
-                queue=queue,
+        jl_kwargs = {"inner_max_num_threads": 1} if backend == "loky" else {}
+        with jl.parallel_config(backend, n_jobs=n_jobs, **jl_kwargs):
+            res = jl.Parallel(n_jobs=n_jobs, backend=backend)(
+                jl.delayed(_callback_wrapper)(
+                    *((chosen_runner, i, cs) if use_ixs else (chosen_runner, cs)),
+                    *args,
+                    **kwargs,
+                    queue=queue,
+                )
+                for i, cs in enumerate(collections)
             )
-            for i, cs in enumerate(collections)
-        )
 
-        if thread is not None:
-            thread.join()
+            if thread is not None:
+                thread.join()
 
-        return res if extractor is None else extractor(res)
+            return res if extractor is None else extractor(res)
 
     if n_jobs is None:
         n_jobs = 1
diff --git a/tests/utils/test_parallelize.py b/tests/utils/test_parallelize.py
@@ -2,21 +2,26 @@
 
 from __future__ import annotations
 
-import time
+import os
 from collections.abc import Callable
 from functools import partial
 
 import dask.array as da
 import numba
 import numpy as np
-import psutil
 import pytest  # type: ignore[import]
 
 from squidpy._utils import Signal, parallelize
 
 # Functions to be parallelized
 
 
+def wrap_numba_check(x, y, inner_function, check_threads=True):
+    if check_threads:
+        assert numba.get_num_threads() == 1
+    return inner_function(x, y)
+
+
 @numba.njit(parallel=True)
 def numba_parallel_func(x, y) -> np.ndarray:
     return x * 2 + y
@@ -38,9 +43,9 @@ def vanilla_func(x, y) -> np.ndarray:
 # Mock runner function
 
 
-def mock_runner(x, y, queue, func):
-    for i in range(len(x)):
-        x[i] = func(x[i], y)
+def mock_runner(x, y, queue, function):
+    for i, xi in enumerate(x):
+        x[i] = function(xi, y, check_threads=True)
         if queue is not None:
             queue.put(Signal.UPDATE)
     if queue is not None:
@@ -51,54 +56,52 @@ def mock_runner(x, y, queue, func):
 @pytest.fixture(params=["numba_parallel", "numba_serial", "dask", "vanilla"])
 def func(request) -> Callable:
     return {
-        "numba_parallel": numba_parallel_func,
-        "numba_serial": numba_serial_func,
-        "dask": dask_func,
-        "vanilla": vanilla_func,
+        "numba_parallel": partial(wrap_numba_check, inner_function=numba_parallel_func),
+        "numba_serial": partial(wrap_numba_check, inner_function=numba_serial_func),
+        "dask": partial(wrap_numba_check, inner_function=dask_func),
+        "vanilla": partial(wrap_numba_check, inner_function=vanilla_func),
     }[request.param]
 
 
-@pytest.mark.timeout(60)
-@pytest.mark.parametrize("n_jobs", [1, 2, 8])
-def test_parallelize_loky(func, n_jobs):
-    start_time = time.time()
+# Timeouts are also useful because some processes don't return in
+# in case of failure.
+
+
+@pytest.mark.timeout(30)
+@pytest.mark.parametrize(
+    "backend",
+    [
+        pytest.param(
+            "threading",
+            marks=pytest.mark.skipif(
+                os.environ.get("CI") == "true", reason="Only testing 'loky' backend in CI environment"
+            ),
+        ),
+        pytest.param(
+            "multiprocessing",
+            marks=pytest.mark.skipif(
+                os.environ.get("CI") == "true", reason="Only testing 'loky' backend in CI environment"
+            ),
+        ),
+        "loky",
+    ],
+)
+def test_parallelize(func, backend):
     seed = 42
+    n = 2
+    n_jobs = 2
     rng = np.random.RandomState(seed)
-    n = 8
     arr1 = [rng.randint(0, 100, n) for _ in range(n)]
     arr2 = np.arange(n)
-    runner = partial(mock_runner, func=func)
-    # this is the expected result of the function
-    expected = [func(arr1[i], arr2) for i in range(len(arr1))]
-    # this will be set to something other than 1,2,8
-    # we want to check if setting the threads works
-    # then after the function is run if the numba cores are set back to 1
-    old_num_threads = 3
-    numba.set_num_threads(old_num_threads)
-    # Get initial state
-    initial_process = psutil.Process()
-    initial_children = {p.pid for p in initial_process.children(recursive=True)}
-    initial_children = {psutil.Process(pid) for pid in initial_children}
-    init_numba_threads = numba.get_num_threads()
-
-    p_func = parallelize(runner, arr1, n_jobs=n_jobs, backend="loky", use_ixs=False, n_split=1)
-    result = p_func(arr2)[0]
-
-    final_children = {p.pid for p in initial_process.children(recursive=True)}
-    final_numba_threads = numba.get_num_threads()
-
-    assert init_numba_threads == old_num_threads, "Numba threads should not change"
-    assert final_numba_threads == 1, "Numba threads should be 1"
-    assert len(result) == len(expected), f"Expected: {expected} but got {result}. Length mismatch"
-    for i in range(len(arr1)):
-        assert np.all(result[i] == expected[i]), f"Expected {expected[i]} but got {result[i]}"
-
-    processes = final_children - initial_children
-
-    processes = {psutil.Process(pid) for pid in processes}
-    processes = {p for p in processes if not any("resource_tracker" in cl for cl in p.cmdline())}
-    if n_jobs > 1:  # expect exactly n_jobs
-        assert len(processes) == n_jobs, f"Unexpected processes created or not created: {processes}"
-    else:  # some functions use the main process others use a new process
-        processes = {p for p in processes if p.create_time() > start_time}
-        assert len(processes) <= 1, f"Unexpected processes created or not created: {processes}"
+    runner = partial(mock_runner, function=func)
+
+    init_threads = numba.get_num_threads()
+    expected = np.vstack([func(a1, arr2, check_threads=False) for a1 in arr1])
+
+    p_func = parallelize(
+        runner, arr1, n_jobs=n_jobs, backend=backend, use_ixs=False, extractor=np.vstack, show_progress=False
+    )
+    result = p_func(arr2)
+
+    assert numba.get_num_threads() == init_threads, "Number of threads should stay the same after parallelization"
+    assert np.allclose(result, expected), f"Expected: {expected} but got {result}"
diff --git a/tox.ini b/tox.ini
@@ -80,7 +80,7 @@ extras =
     interactive
     test
 setenv = linux: PYTEST_FLAGS=--test-napari
-passenv = TOXENV,CI,CODECOV_*,GITHUB_ACTIONS,PYTEST_FLAGS,DISPLAY,XAUTHORITY,MPLBACKEND
+passenv = TOXENV,CI,CODECOV_*,GITHUB_ACTIONS,PYTEST_FLAGS,DISPLAY,XAUTHORITY,MPLBACKEND,PYTEST_ADDOPTS
 usedevelop = true
 commands =
     python -m pytest --color=yes --cov --cov-append --cov-report=xml --cov-config={toxinidir}/tox.ini --ignore docs/ {posargs:-vv} {env:PYTEST_FLAGS:}