Skip to content

Commit c19307c

Browse files
committed
(feat): tracking peak memory, clean up args
1 parent 3eb9d9a commit c19307c

File tree

6 files changed

+152
-71
lines changed

6 files changed

+152
-71
lines changed

.gitignore

+4-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
__pycache__/
44
/*cache/
55
.ipynb_checkpoints/
6-
/data/
6+
data/
77

88
# Distribution / packaging
99
/dist/
@@ -16,3 +16,6 @@ __pycache__/
1616

1717
# Venvs
1818
*venv/
19+
20+
# asv
21+
.asv/

benchmarks/benchmarks/preprocessing.py

+34-20
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99

1010
import rapids_singlecell as rsc
1111

12+
from .utils import track_peakmem
13+
1214

1315
class PreprocessingSuite:
1416
_data_dict = dict(pbmc68k_reduced=sc.datasets.pbmc68k_reduced())
@@ -21,69 +23,81 @@ def setup(self, input_data: str):
2123
def time_calculate_qc_metrics(self, *_):
2224
self.adata.var["mt"] = self.adata.var_names.str.startswith("MT-")
2325
rsc.pp.calculate_qc_metrics(
24-
self.adata, qc_vars=["mt"], percent_top=None, log1p=False, inplace=True
26+
self.adata, qc_vars=["mt"], log1p=False
2527
)
2628

27-
def peakmem_calculate_qc_metrics(self, *_):
29+
@track_peakmem
30+
def track_peakmem_calculate_qc_metrics(self, *_):
2831
self.adata.var["mt"] = self.adata.var_names.str.startswith("MT-")
2932
rsc.pp.calculate_qc_metrics(
30-
self.adata, qc_vars=["mt"], percent_top=None, log1p=False, inplace=True
33+
self.adata, qc_vars=["mt"], log1p=False
3134
)
3235

3336
def time_filter_cells(self, *_):
34-
rsc.pp.filter_cells(self.adata, min_genes=200)
37+
rsc.pp.filter_cells(self.adata, qc_var="n_counts", min_count=200)
38+
39+
@track_peakmem
40+
def track_peakmem_filter_cells(self, *_):
41+
rsc.pp.filter_cells(self.adata, qc_var="n_counts", min_count=200)
3542

36-
def peakmem_filter_cells(self, *_):
37-
rsc.pp.filter_cells(self.adata, min_genes=200)
3843

3944
def time_filter_genes(self, *_):
40-
rsc.pp.filter_genes(self.adata, min_cells=3)
45+
rsc.pp.filter_genes(self.adata, qc_var="n_counts", min_count=3)
4146

42-
def peakmem_filter_genes(self, *_):
43-
rsc.pp.filter_genes(self.adata, min_cells=3)
47+
@track_peakmem
48+
def track_peakmem_filter_genes(self, *_):
49+
rsc.pp.filter_genes(self.adata, qc_var="n_counts", min_count=3)
4450

4551
def time_normalize_total(self, *_):
4652
rsc.pp.normalize_total(self.adata, target_sum=1e4)
4753

48-
def peakmem_normalize_total(self, *_):
54+
@track_peakmem
55+
def track_peakmem_normalize_total(self, *_):
4956
rsc.pp.normalize_total(self.adata, target_sum=1e4)
5057

5158
def time_log1p(self, *_):
5259
rsc.pp.log1p(self.adata)
5360

54-
def peakmem_time_log1p(self, *_):
61+
@track_peakmem
62+
def track_peakmem_time_log1p(self, *_):
5563
rsc.pp.log1p(self.adata)
5664

5765
def time_pca(self, *_):
58-
rsc.pp.pca(self.adata, svd_solver="arpack")
66+
rsc.pp.pca(self.adata)
5967

60-
def peakmem_pca(self, *_):
61-
rsc.pp.pca(self.adata, svd_solver="arpack")
68+
@track_peakmem
69+
def track_peakmem_pca(self, *_):
70+
rsc.pp.pca(self.adata)
6271

6372
def time_highly_variable_genes(self, *_):
6473
rsc.pp.highly_variable_genes(
6574
self.adata, min_mean=0.0125, max_mean=3, min_disp=0.5
6675
)
6776

68-
def peakmem_highly_variable_genes(self, *_):
77+
@track_peakmem
78+
def track_peakmem_highly_variable_genes(self, *_):
6979
rsc.pp.highly_variable_genes(
7080
self.adata, min_mean=0.0125, max_mean=3, min_disp=0.5
7181
)
7282

7383
def time_regress_out(self, *_):
7484
rsc.pp.regress_out(self.adata, ["n_counts", "percent_mito"])
7585

76-
def peakmem_regress_out(self, *_):
86+
@track_peakmem
87+
def track_peakmem_regress_out(self, *_):
7788
rsc.pp.regress_out(self.adata, ["n_counts", "percent_mito"])
7889

7990
def time_scale(self, *_):
8091
rsc.pp.scale(self.adata, max_value=10)
8192

82-
def peakmem_scale(self, *_):
93+
@track_peakmem
94+
def track_peakmem_scale(self, *_):
8395
rsc.pp.scale(self.adata, max_value=10)
8496

8597
def time_neighbors(self, *_):
86-
rsc.pp.neighbors(self.adata, n_neighbors=15, n_pcs=100)
98+
rsc.pp.neighbors(self.adata, n_neighbors=15, n_pcs=50)
99+
100+
@track_peakmem
101+
def track_peakmem_neighbors(self, *_):
102+
rsc.pp.neighbors(self.adata, n_neighbors=15, n_pcs=50)
87103

88-
def peakmem_neighbors(self, *_):
89-
rsc.pp.neighbors(self.adata, n_neighbors=15, n_pcs=100)

benchmarks/benchmarks/readwrite.py

+7-27
Original file line numberDiff line numberDiff line change
@@ -22,43 +22,23 @@
2222

2323
from __future__ import annotations
2424

25-
from dataclasses import dataclass
26-
from typing import TYPE_CHECKING
27-
28-
import anndata
2925
import scanpy as sc
3026

3127
from rapids_singlecell.get import anndata_to_GPU
32-
33-
if TYPE_CHECKING:
34-
from collections.abc import Callable
35-
from pathlib import Path
36-
37-
import pathlib
38-
39-
sc.settings.datasetdir = pathlib.Path(__file__).parent.resolve() / "data"
40-
41-
@dataclass
42-
class Dataset:
43-
path: Path
44-
get: Callable[[], anndata.AnnData]
45-
46-
path="/p/project/training2406/team_scverse/gold2/rapids_singlecell/benchmarks/data/pbmc3k_raw.h5ad"
47-
28+
from .utils import track_peakmem
4829

4930
class ToGPUSuite:
50-
_data_dict = dict(pbmc3k=anndata.read_h5ad(path))
31+
_data_dict = dict(obmc68k_reduced=sc.datasets.pbmc68k_reduced())
5132
params = _data_dict.keys()
5233
param_names = ["input_data"]
5334

5435
def setup(self, input_data: str):
55-
self.data = self._data_dict[input_data]
36+
self.adata = self._data_dict[input_data]
5637

5738
def time_to_gpu(self, *_):
58-
anndata_to_GPU(self.data)
39+
anndata_to_GPU(self.adata)
5940

60-
def peakmem_to_gpu(self, *_):
61-
anndata_to_GPU(self.data)
41+
@track_peakmem
42+
def track_peakmem_to_gpu(self, *_):
43+
anndata_to_GPU(self.adata)
6244

63-
def mem_to_gpu(self, *_):
64-
anndata_to_GPU(self.data)

benchmarks/benchmarks/squidpy.py

+23-17
Original file line numberDiff line numberDiff line change
@@ -7,52 +7,58 @@
77

88
from itertools import product
99

10-
import anndata as ad
10+
import scanpy as sc
1111

1212
import rapids_singlecell as rsc
1313

14-
import pathlib
14+
from .utils import track_peakmem
1515

1616
class ToolsSuite:
1717
_data_dict = dict(
18-
visium_sge=ad.read_h5ad("/p/project/training2406/team_scverse/gold2/rapids_singlecell/benchmarks/data/paul15.h5ad"),
18+
pbmc68k_reduced=sc.datasets.pbmc68k_reduced(),
1919
)
2020
params = _data_dict.keys()
2121
param_names = ["input_data"]
2222

2323
def setup(self, input_data):
24-
self.adata = rsc.get.anndata_to_GPU(self._data_dict[input_data].copy(), copy=True)
24+
self.cpu_adata = self._data_dict[input_data].copy()
25+
self.gpu_adata = rsc.get.anndata_to_GPU(self.cpu_adata, copy=True)
2526

2627
def time_ligrec(self, *_):
27-
gene_ids = self.adata.var.index
28+
gene_ids = self.cpu_adata.var.index
2829
interactions = tuple(product(gene_ids[:5], gene_ids[:5]))
2930
rsc.gr.ligrec(
30-
self.adata,
31-
"leiden",
31+
self.cpu_adata,
32+
"louvain",
3233
interactions=interactions,
3334
n_perms=5,
3435
use_raw=False,
3536
)
3637

37-
def peakmem_ligrec(self, *_):
38-
gene_ids = self.adata.var.index
38+
@track_peakmem
39+
def track_peakmem_ligrec(self, *_):
40+
gene_ids = self.cpu_adata.var.index
3941
interactions = tuple(product(gene_ids[:5], gene_ids[:5]))
4042
rsc.gr.ligrec(
41-
self.adata,
42-
"leiden",
43+
self.cpu_adata,
44+
"louvain",
4345
interactions=interactions,
4446
n_perms=5,
4547
use_raw=False,
4648
)
4749

50+
4851
def time_autocorr_moran(self, *_):
49-
rsc.gr.spatial_autocorr(self.adata, mode="moran")
52+
rsc.gr.spatial_autocorr(self.gpu_adata, mode="moran", connectivity_key="connectivities")
5053

51-
def peakmem_autocorr_moran(self, *_):
52-
rsc.gr.spatial_autocorr(self.adata, mode="moran")
54+
@track_peakmem
55+
def track_peakmem_autocorr_moran(self, *_):
56+
rsc.gr.spatial_autocorr(self.gpu_adata, mode="moran", connectivity_key="connectivities")
5357

5458
def time_autocorr_geary(self, *_):
55-
rsc.gr.spatial_autocorr(self.adata, mode="geary")
59+
rsc.gr.spatial_autocorr(self.gpu_adata, mode="geary", connectivity_key="connectivities")
60+
61+
@track_peakmem
62+
def track_peakmem_autocorr_geary(self, *_):
63+
rsc.gr.spatial_autocorr(self.gpu_adata, mode="geary", connectivity_key="connectivities")
5664

57-
def peakmem_autocorr_geary(self, *_):
58-
rsc.gr.spatial_autocorr(self.adata, mode="geary")

benchmarks/benchmarks/tools.py

+10-6
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
import rapids_singlecell as rsc
1111

12-
import pathlib
12+
from .utils import track_peakmem
1313

1414

1515
class ToolsSuite:
@@ -21,28 +21,32 @@ class ToolsSuite:
2121

2222
def setup(self, input_data):
2323
self.adata = rsc.get.anndata_to_GPU(self._data_dict[input_data].copy(), copy=True)
24-
assert "X_pca" in self.adata.obsm
2524

2625
def time_umap(self, *_):
2726
rsc.tl.umap(self.adata)
2827

29-
def peakmem_umap(self, *_):
28+
@track_peakmem
29+
def track_peakmem_umap(self, *_):
3030
rsc.tl.umap(self.adata)
3131

3232
def time_diffmap(self, *_):
3333
rsc.tl.diffmap(self.adata)
3434

35-
def peakmem_diffmap(self, *_):
35+
@track_peakmem
36+
def track_peakmem_diffmap(self, *_):
3637
rsc.tl.diffmap(self.adata)
3738

3839
def time_leiden(self, *_):
3940
rsc.tl.leiden(self.adata)
4041

41-
def peakmem_leiden(self, *_):
42+
@track_peakmem
43+
def track_peakmem_leiden(self, *_):
4244
rsc.tl.leiden(self.adata)
4345

4446
def time_embedding_denity(self, *_):
4547
rsc.tl.embedding_density(self.adata, basis="umap")
4648

47-
def peakmem_embedding_denity(self, *_):
49+
@track_peakmem
50+
def track_peakmem_embedding_denity(self, *_):
4851
rsc.tl.embedding_density(self.adata, basis="umap")
52+

benchmarks/benchmarks/utils.py

+74
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
# From https://github.com/rapidsai/benchmark/blob/570531ba4bc90c508245e943d2aaa11d68a24286/rapids_pytest_benchmark/rapids_pytest_benchmark/rmm_resource_analyzer.py#L29
2+
3+
import os
4+
import csv
5+
import rmm
6+
import tempfile
7+
8+
9+
class RMMResourceAnalyzer:
10+
"""
11+
Class to control enabling, disabling, & parsing RMM resource
12+
logs.
13+
"""
14+
15+
def __init__(self, benchmark_name):
16+
self.max_gpu_util = -1
17+
self.max_gpu_mem_usage = 0
18+
self.leaked_memory = 0
19+
log_file_name = benchmark_name
20+
self._log_file_prefix = os.path.join(tempfile.gettempdir(), log_file_name)
21+
22+
def enable_logging(self):
23+
"""
24+
Enable RMM logging. RMM creates a CSV output file derived from
25+
provided file name that looks like: log_file_prefix + ".devX", where
26+
X is the GPU number.
27+
"""
28+
rmm.enable_logging(log_file_name=self._log_file_prefix)
29+
30+
def disable_logging(self):
31+
"""
32+
Disable RMM logging
33+
"""
34+
log_output_files = rmm.get_log_filenames()
35+
rmm.mr._flush_logs()
36+
rmm.disable_logging()
37+
# FIXME: potential improvement here would be to only parse the log files for
38+
# the gpu ID that's passed in via --benchmark-gpu-device
39+
self._parse_results(log_output_files)
40+
for _, log_file in log_output_files.items():
41+
os.remove(log_file)
42+
43+
def _parse_results(self, log_files):
44+
"""
45+
Parse CSV results. CSV file has columns:
46+
Thread,Time,Action,Pointer,Size,Stream
47+
"""
48+
current_mem_usage = 0
49+
for _, log_file in log_files.items():
50+
with open(log_file, mode="r") as csv_file:
51+
csv_reader = csv.DictReader(csv_file)
52+
for row in csv_reader:
53+
row_action = row["Action"]
54+
row_size = int(row["Size"])
55+
56+
if row_action == "allocate":
57+
current_mem_usage += row_size
58+
if current_mem_usage > self.max_gpu_mem_usage:
59+
self.max_gpu_mem_usage = current_mem_usage
60+
61+
if row_action == "free":
62+
current_mem_usage -= row_size
63+
self.leaked_memory = current_mem_usage
64+
65+
def track_peakmem(fn):
66+
from functools import wraps
67+
@wraps(fn)
68+
def wrapper(self, *args, **kwargs):
69+
resource_analyzer = RMMResourceAnalyzer(benchmark_name=fn.__name__)
70+
resource_analyzer.enable_logging()
71+
fn(self, *args, **kwargs)
72+
resource_analyzer.disable_logging()
73+
return resource_analyzer.max_gpu_mem_usage
74+
return wrapper

0 commit comments

Comments
 (0)