Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
54 commits
Select commit Hold shift + click to select a range
7d2c62b
add precommit
Intron7 Sep 11, 2025
7bc5367
add first implementation
Intron7 Sep 11, 2025
dc76b24
clang format
Intron7 Sep 11, 2025
dc3648b
format
Intron7 Sep 11, 2025
ef8a756
format c++
Intron7 Sep 11, 2025
76ae7aa
Merge branch 'main' into move-to-nanobind
Intron7 Sep 11, 2025
7ed481d
change clang-format
Intron7 Sep 11, 2025
48ba592
fix version
Intron7 Sep 11, 2025
1d2f12a
test docs
Intron7 Sep 11, 2025
60c4863
fix yml
Intron7 Sep 11, 2025
9a2b113
fix sparse to dense kernel launch
Intron7 Sep 11, 2025
2d5ea85
test read the docs
Intron7 Sep 11, 2025
9878e5d
try env
Intron7 Sep 11, 2025
6b46e8a
test cmakeargs
Intron7 Sep 11, 2025
55027f7
add system back
Intron7 Sep 11, 2025
b780405
add failsafe
Intron7 Sep 11, 2025
24104ff
remove print and slim down toml
Intron7 Sep 12, 2025
dddd9e8
Add almost unchanged cibw
flying-sheep Sep 15, 2025
5981d50
No macOS
flying-sheep Sep 15, 2025
b3c3853
test build wheels
Intron7 Sep 15, 2025
b24bf7b
next
Intron7 Sep 15, 2025
56aca24
remove wheels workflow
Intron7 Sep 15, 2025
7068b19
remove windows
Intron7 Sep 15, 2025
9a58ff0
remove optional parts
flying-sheep Sep 15, 2025
7f65657
test publish
Intron7 Sep 15, 2025
56f837a
3.12
flying-sheep Sep 15, 2025
3a9a9f1
fix path
Intron7 Sep 15, 2025
474de68
remove bad/useless
flying-sheep Sep 15, 2025
646ba23
fix container
Intron7 Sep 15, 2025
ae57cb1
try CUDA_PATH
flying-sheep Sep 15, 2025
33ac5af
skip musl again
flying-sheep Sep 15, 2025
a529a58
add next kernels
Intron7 Sep 15, 2025
0685436
add pca and make safe docs
Intron7 Sep 16, 2025
24551bd
Merge branch 'main' into move-to-nanobind
Intron7 Sep 16, 2025
5d327bd
make aggr safe
Intron7 Sep 16, 2025
30414ab
add harmony
Intron7 Sep 16, 2025
d46ab83
make qc smaller
Intron7 Sep 16, 2025
d45d6bf
add ligrec
Intron7 Sep 16, 2025
20cf11e
move decoupler
Intron7 Sep 16, 2025
134d2e0
remove rawkernels
Intron7 Sep 16, 2025
a872962
add release note
Intron7 Sep 16, 2025
2825de7
fix shape qc
Intron7 Sep 16, 2025
66e930f
fix entropy
Intron7 Sep 16, 2025
d386000
fix version
Intron7 Sep 16, 2025
cfdec19
add streams
Intron7 Sep 17, 2025
4876400
Merge branch 'main' into move-to-nanobind
Intron7 Sep 17, 2025
3a20dc2
fix pointer
Intron7 Sep 17, 2025
948b86a
fix test
Intron7 Sep 17, 2025
3fdde98
terse args
flying-sheep Sep 18, 2025
8abaab0
kw-only for aggr.cu
flying-sheep Sep 18, 2025
84a34c4
remaining cleanup
flying-sheep Sep 18, 2025
e53c87a
add keywords
Intron7 Sep 18, 2025
ad7ed53
fix keywords ligrec
Intron7 Sep 18, 2025
a62a596
add 120
Intron7 Sep 22, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions .clang-format
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
BasedOnStyle: Google
Language: Cpp

# Make braces stay on the same line (like your diffs)
BreakBeforeBraces: Attach
AllowShortFunctionsOnASingleLine: None

# Compact/“binpack” parameter lists (what produced your earlier diffs)
BinPackParameters: true
BinPackArguments: true

# Typical CUDA/C++ ergonomics
IndentWidth: 2
ColumnLimit: 100
PointerAlignment: Left
DerivePointerAlignment: false

# Don’t reorder #includes if you don’t want surprise churn
SortIncludes: false

# Optional: make templates break more aggressively
AlwaysBreakTemplateDeclarations: Yes
103 changes: 81 additions & 22 deletions .github/workflows/publish.yml
Original file line number Diff line number Diff line change
@@ -1,36 +1,95 @@
# This workflow will upload a Python Package using Twine when a release is created
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries
# https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries
# https://github.com/pypa/cibuildwheel/blob/main/examples/github-deploy.yml

# This workflow uses actions that are not certified by GitHub.
# They are provided by a third-party and are governed by
# separate terms of service, privacy policy, and support
# documentation.

name: Upload Python Package
name: Build and upload to PyPI

on:
workflow_dispatch:
pull_request:
push:
branches: [main]
release:
types: [published]

jobs:
deploy:
build_wheels:
name: Build wheels for ${{ matrix.os }}
runs-on: ${{ matrix.runs-on }}
strategy:
matrix:
include:
- os: linux-intel
runs-on: ubuntu-latest
cibw_image: "ghcr.io/scverse/rapids_singlecell:manylinux_2_28_x86_64_cuda12.9"
dockerfile: "docker/manylinux_2_28_x86_64_cuda12.9.Dockerfile"
- os: linux-arm
runs-on: ubuntu-24.04-arm
cibw_image: "ghcr.io/scverse/rapids_singlecell:manylinux_2_28_aarch64_cuda12.9"
dockerfile: "docker/manylinux_2_28_aarch64_cuda12.9.Dockerfile"

steps:
- uses: actions/checkout@v5

- name: Build CUDA manylinux image
run: |
docker build -t "${{ matrix.cibw_image }}" -f "${{ matrix.dockerfile }}" docker

# cibuildwheel action (Linux-only wheels inside our custom manylinux+CUDA images)
- name: Build wheels (CUDA 12.9)
uses: pypa/[email protected]
env:
# Skip musllinux
CIBW_SKIP: '*-musllinux*'
# Point cibuildwheel to our CUDA manylinux images (per-arch)
CIBW_MANYLINUX_X86_64_IMAGE: ${{ matrix.os == 'linux-intel' && matrix.cibw_image || '' }}
CIBW_MANYLINUX_AARCH64_IMAGE: ${{ matrix.os == 'linux-arm' && matrix.cibw_image || '' }}
# Make CUDA visible inside the build container
CIBW_ENVIRONMENT: >
CUDA_PATH=/usr/local/cuda
LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
PATH=/usr/local/cuda/bin:$PATH
# Tooling to build a nanobind/scikit-build-core extension
CIBW_BEFORE_BUILD: >
python -m pip install -U pip
scikit-build-core cmake ninja nanobind
# No runtime tests (CI has no GPU)
CIBW_TEST_SKIP: "*"
CIBW_TEST_COMMAND: ""
# Bundle redistributable CUDA libs & ensure manylinux compliance
CIBW_REPAIR_WHEEL_COMMAND: "auditwheel repair -w {dest_dir} {wheel}"
# Be somewhat chatty to see compile/link flags
CIBW_BUILD_VERBOSITY: "1"

- uses: actions/upload-artifact@v4
with:
name: cibw-wheels-${{ matrix.os }}-${{ strategy.job-index }}
path: ./wheelhouse/*.whl

build_sdist:
name: Build source distribution
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
- name: Build sdist
run: pipx run build --sdist
- uses: actions/upload-artifact@v4
with:
name: cibw-sdist
path: dist/*.tar.gz

upload_pypi:
needs: [build_wheels, build_sdist]
runs-on: ubuntu-latest
environment: publish

permissions:
id-token: write

if: github.event_name == 'release' && github.event.action == 'published'
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.x'
- name: Install CLI tool
run: pip install build
- name: Build package
run: python -m build
- name: Publish package
uses: pypa/gh-action-pypi-publish@release/v1
- uses: actions/download-artifact@v5
with:
# unpacks all CIBW artifacts into dist/
pattern: cibw-*
path: dist
merge-multiple: true

- uses: pypa/gh-action-pypi-publish@release/v1
6 changes: 6 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,9 @@ repos:
- id: codespell
additional_dependencies:
- tomli
- repo: https://github.com/pre-commit/mirrors-clang-format
rev: v18.1.8
hooks:
- id: clang-format
args: [--style=file, -i]
types_or: [c, c++, cuda]
3 changes: 2 additions & 1 deletion .readthedocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,15 @@ build:
os: ubuntu-24.04
tools:
python: "3.12"

commands:
# Install and set up uv
- asdf plugin add uv
- asdf install uv latest
- asdf global uv latest

# Use uv to synchronize dependencies
- uv pip install --system .[doc]
- CMAKE_ARGS="-DRSC_BUILD_EXTENSIONS=OFF" uv pip install --system ".[doc]"

# Build documentation using sphinx
- python -m sphinx -T -b html -d docs/_build/doctrees -D language=en docs $READTHEDOCS_OUTPUT/html
Expand Down
67 changes: 67 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
cmake_minimum_required(VERSION 3.24)

project(rapids_singlecell_cuda LANGUAGES CXX)

# Option to disable building compiled extensions (for docs/RTD)
option(RSC_BUILD_EXTENSIONS "Build CUDA/C++ extensions" ON)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)

if (RSC_BUILD_EXTENSIONS)
enable_language(CUDA)
find_package(Python REQUIRED COMPONENTS Interpreter Development.Module ${SKBUILD_SABI_COMPONENT})
find_package(nanobind CONFIG REQUIRED)
find_package(CUDAToolkit REQUIRED)
else()
message(STATUS "RSC_BUILD_EXTENSIONS=OFF -> skipping compiled extensions for docs")
endif()

# Helper to declare a nanobind CUDA module uniformly
function(add_nb_cuda_module target src)
if (RSC_BUILD_EXTENSIONS)
nanobind_add_module(${target} STABLE_ABI LTO
${src}
)
target_link_libraries(${target} PRIVATE CUDA::cudart)
set_target_properties(${target} PROPERTIES
CUDA_SEPARABLE_COMPILATION ON
)
install(TARGETS ${target} LIBRARY DESTINATION rapids_singlecell/_cuda)
# Also copy built module into source tree for editable installs
add_custom_command(TARGET ${target} POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy
$<TARGET_FILE:${target}>
${PROJECT_SOURCE_DIR}/src/rapids_singlecell/_cuda/$<TARGET_FILE_NAME:${target}>
)
endif()
endfunction()

if (RSC_BUILD_EXTENSIONS)
# CUDA modules
add_nb_cuda_module(_mean_var_cuda src/rapids_singlecell/_cuda/mean_var/mean_var.cu)
add_nb_cuda_module(_sparse2dense_cuda src/rapids_singlecell/_cuda/sparse2dense/sparse2dense.cu)
add_nb_cuda_module(_scale_cuda src/rapids_singlecell/_cuda/scale/scale.cu)
add_nb_cuda_module(_qc_cuda src/rapids_singlecell/_cuda/qc/qc.cu)
add_nb_cuda_module(_qc_dask_cuda src/rapids_singlecell/_cuda/qc_dask/qc_kernels_dask.cu)
add_nb_cuda_module(_bbknn_cuda src/rapids_singlecell/_cuda/bbknn/bbknn.cu)
add_nb_cuda_module(_norm_cuda src/rapids_singlecell/_cuda/norm/norm.cu)
add_nb_cuda_module(_pr_cuda src/rapids_singlecell/_cuda/pr/pr.cu)
add_nb_cuda_module(_nn_descent_cuda src/rapids_singlecell/_cuda/nn_descent/nn_descent.cu)
add_nb_cuda_module(_aucell_cuda src/rapids_singlecell/_cuda/aucell/aucell.cu)
add_nb_cuda_module(_nanmean_cuda src/rapids_singlecell/_cuda/nanmean/nanmean.cu)
add_nb_cuda_module(_autocorr_cuda src/rapids_singlecell/_cuda/autocorr/autocorr.cu)
add_nb_cuda_module(_cooc_cuda src/rapids_singlecell/_cuda/cooc/cooc.cu)
add_nb_cuda_module(_aggr_cuda src/rapids_singlecell/_cuda/aggr/aggr.cu)
add_nb_cuda_module(_spca_cuda src/rapids_singlecell/_cuda/spca/spca.cu)
add_nb_cuda_module(_ligrec_cuda src/rapids_singlecell/_cuda/ligrec/ligrec.cu)
add_nb_cuda_module(_pv_cuda src/rapids_singlecell/_cuda/pv/pv.cu)
# Harmony CUDA modules
add_nb_cuda_module(_harmony_scatter_cuda src/rapids_singlecell/_cuda/harmony/scatter/scatter.cu)
add_nb_cuda_module(_harmony_outer_cuda src/rapids_singlecell/_cuda/harmony/outer/outer.cu)
add_nb_cuda_module(_harmony_colsum_cuda src/rapids_singlecell/_cuda/harmony/colsum/colsum.cu)
add_nb_cuda_module(_harmony_kmeans_cuda src/rapids_singlecell/_cuda/harmony/kmeans/kmeans.cu)
add_nb_cuda_module(_harmony_normalize_cuda src/rapids_singlecell/_cuda/harmony/normalize/normalize.cu)
add_nb_cuda_module(_harmony_pen_cuda src/rapids_singlecell/_cuda/harmony/pen/pen.cu)
endif()
18 changes: 18 additions & 0 deletions docker/manylinux_2_28_aarch64_cuda12.9.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
FROM quay.io/pypa/manylinux_2_28_aarch64

RUN yum -y install dnf-plugins-core && \
dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo && \
yum -y clean all && yum -y makecache && \
yum -y install \
cuda-nvcc-12-9 \
cuda-cudart-12-9 \
cuda-cudart-devel-12-9 \
libcublas-12-9 \
libcublas-devel-12-9 \
libcusparse-12-9 \
libcusparse-devel-12-9 && \
yum clean all

ENV CUDA_HOME=/usr/local/cuda
ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:${LD_LIBRARY_PATH}
ENV PATH=/usr/local/cuda/bin:${PATH}
20 changes: 20 additions & 0 deletions docker/manylinux_2_28_x86_64_cuda12.9.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
FROM quay.io/pypa/manylinux_2_28_x86_64

# Add NVIDIA CUDA repo (RHEL8/Alma8 base in manylinux_2_28)
RUN yum -y install dnf-plugins-core && \
dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo && \
yum -y clean all && yum -y makecache && \
# Install only what you actually link against
yum -y install \
cuda-nvcc-12-9 \
cuda-cudart-12-9 \
cuda-cudart-devel-12-9 \
libcublas-12-9 \
libcublas-devel-12-9 \
libcusparse-12-9 \
libcusparse-devel-12-9 && \
yum clean all

ENV CUDA_HOME=/usr/local/cuda
ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:${LD_LIBRARY_PATH}
ENV PATH=/usr/local/cuda/bin:${PATH}
4 changes: 2 additions & 2 deletions docs/release-notes/0.13.3.md → docs/release-notes/0.14.0.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
### 0.13.3 {small}`the-future`
### 0.14.0 {small}`the-future`

```{rubric} Features
```

* switch all `cupy.rawkernels` into a compiled cuda extension with nanobind {pr}`455` {smaller}`S Dicks & P Angerer`

```{rubric} Performance
```
Expand Down
6 changes: 4 additions & 2 deletions docs/release-notes/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@

# Release notes

## Version 0.13.0
```{include} /release-notes/0.13.3.md
## Version 0.14.0
```{include} /release-notes/0.14.0.md
```

## Version 0.13.0
```{include} /release-notes/0.13.2.md
```
```{include} /release-notes/0.13.1.md
Expand Down
57 changes: 41 additions & 16 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
[build-system]
requires = [ "hatchling", "hatch-vcs" ]
build-backend = "hatchling.build"
requires = [
"scikit-build-core>=0.10",
"nanobind>=2.0.0",
"pybind11-stubgen",
"setuptools-scm>=8",
]
build-backend = "scikit_build_core.build"

[project]
name = "rapids_singlecell"
description = "running single cell analysis on Nvidia GPUs"
requires-python = ">=3.11, <3.14"
requires-python = ">=3.12, <3.14"
license = { file = "LICENSE" }
authors = [ { name = "Severin Dicks" } ]
readme = { file = "README.md", content-type = "text/markdown" }
Expand Down Expand Up @@ -107,21 +112,41 @@ markers = [
"gpu: tests that use a GPU (currently unused, but needs to be specified here as we import anndata.tests.helpers, which uses it)",
]

[tool.hatch.build]
# exclude big files that don’t need to be installed
exclude = [
"tests",
"docs",
"notebooks",
]
[tool.hatch.build.hooks.vcs]
version-file = "src/rapids_singlecell/_version.py"
[tool.setuptools_scm]
write_to = "src/rapids_singlecell/_version.py"
# Optional but useful:
version_scheme = "guess-next-dev"
local_scheme = "node-and-date"

[tool.hatch.version]
source = "vcs"
[tool.scikit-build]
# Use limited ABI wheels (one wheel for all Python minor versions on one platform)
wheel.py-api = "cp312"
wheel.packages = [ "src/rapids_singlecell", "src/testing" ]
cmake.version = ">=3.24"
cmake.build-type = "Release"
ninja.version = ">=1.10"
experimental = false
cmake.args = [ "-DCMAKE_CUDA_ARCHITECTURES=75;80;86;89;90;100;120" ]
build-dir = "build"
metadata.version.provider = "scikit_build_core.metadata.setuptools_scm"
sdist.include = [ "src/rapids_singlecell/_version.py" ]

[tool.hatch.build.targets.wheel]
packages = [ 'src/rapids_singlecell', 'src/testing' ]
# Use abi3audit to catch issues with Limited API wheels
[tool.cibuildwheel.linux]
repair-wheel-command = [
"auditwheel repair -w {dest_dir} {wheel}",
"pipx run abi3audit --strict --report {wheel}",
]
[tool.cibuildwheel.macos]
repair-wheel-command = [
"delocate-wheel --require-archs {delocate_archs} -w {dest_dir} -v {wheel}",
"pipx run abi3audit --strict --report {wheel}",
]
[tool.cibuildwheel.windows]
repair-wheel-command = [
"copy {wheel} {dest_dir}",
"pipx run abi3audit --strict --report {wheel}",
]

[tool.codespell]
skip = '*.ipynb,*.csv'
Expand Down
3 changes: 3 additions & 0 deletions src/rapids_singlecell/_cuda/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from __future__ import annotations

# Subpackage for CUDA extensions (built via scikit-build-core/nanobind)
Loading
Loading