Skip to content

Commit 4b95ba4

Browse files
committed
Merge remote-tracking branch 'leofang/cluster' into HEAD
2 parents b8004e9 + 2c3a619 commit 4b95ba4

File tree

16 files changed

+220
-69
lines changed

16 files changed

+220
-69
lines changed

.github/actions/test/action.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,13 @@ runs:
1414
shell: bash --noprofile --norc -xeuo pipefail {0}
1515
run: nvidia-smi
1616

17+
# The cache action needs this
18+
- name: Install zstd
19+
shell: bash --noprofile --norc -xeuo pipefail {0}
20+
run: |
21+
apt update
22+
apt install zstd
23+
1724
- name: Download bindings build artifacts
1825
uses: actions/download-artifact@v4
1926
with:

.github/workflows/ci-gh.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ jobs:
2626
upload-enabled:
2727
- false
2828
python-version:
29+
- "3.13"
2930
- "3.12"
3031
- "3.11"
3132
- "3.10"

.github/workflows/gh-build-and-test.yml

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -76,17 +76,19 @@ jobs:
7676
test:
7777
# TODO: improve the name once a separate test matrix is defined
7878
name: Test (CUDA ${{ inputs.cuda-version }})
79-
# TODO: enable testing once linux-aarch64 & win-64 GPU runners are up
79+
# TODO: enable testing once win-64 GPU runners are up
8080
if: ${{ (github.repository_owner == 'nvidia') &&
81-
startsWith(inputs.host-platform, 'linux-x64') }}
81+
startsWith(inputs.host-platform, 'linux') }}
8282
permissions:
8383
id-token: write # This is required for configure-aws-credentials
8484
contents: read # This is required for actions/checkout
85-
runs-on: ${{ (inputs.host-platform == 'linux-x64' && 'linux-amd64-gpu-v100-latest-1') }}
86-
# TODO: use a different (nvidia?) container, or just run on bare image
85+
runs-on: ${{ (inputs.host-platform == 'linux-x64' && 'linux-amd64-gpu-v100-latest-1') ||
86+
(inputs.host-platform == 'linux-aarch64' && 'linux-arm64-gpu-a100-latest-1') }}
87+
# Our self-hosted runners require a container
88+
# TODO: use a different (nvidia?) container
8789
container:
8890
options: -u root --security-opt seccomp=unconfined --privileged --shm-size 16g
89-
image: condaforge/miniforge3:latest
91+
image: ubuntu:22.04
9092
env:
9193
NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }}
9294
needs:

cuda_core/cuda/core/_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@
22
#
33
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
44

5-
__version__ = "0.1.0"
5+
__version__ = "0.1.1"

cuda_core/cuda/core/experimental/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,8 @@
99
from cuda.core.experimental._linker import Linker, LinkerOptions
1010
from cuda.core.experimental._program import Program
1111
from cuda.core.experimental._stream import Stream, StreamOptions
12+
from cuda.core.experimental._system import System
13+
14+
system = System()
15+
__import__("sys").modules[__spec__.name + ".system"] = system
16+
del System

cuda_core/cuda/core/experimental/_linker.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -443,7 +443,7 @@ def link(self, target_type) -> ObjectCode:
443443
return ObjectCode(bytes(code), target_type)
444444

445445
def get_error_log(self) -> str:
446-
""" Get the error log generated by the linker.
446+
"""Get the error log generated by the linker.
447447
448448
Returns
449449
-------

cuda_core/cuda/core/experimental/_module.py

Lines changed: 27 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import importlib.metadata
66

77
from cuda import cuda
8-
from cuda.core.experimental._utils import handle_return
8+
from cuda.core.experimental._utils import handle_return, precondition
99

1010
_backend = {
1111
"old": {
@@ -106,30 +106,43 @@ class ObjectCode:
106106
107107
"""
108108

109-
__slots__ = ("_handle", "_code_type", "_module", "_loader", "_sym_map")
109+
__slots__ = ("_handle", "_backend_version", "_jit_options", "_code_type", "_module", "_loader", "_sym_map")
110110
_supported_code_type = ("cubin", "ptx", "ltoir", "fatbin")
111111

112112
def __init__(self, module, code_type, jit_options=None, *, symbol_mapping=None):
113113
if code_type not in self._supported_code_type:
114114
raise ValueError
115115
_lazy_init()
116+
117+
# handle is assigned during _lazy_load
116118
self._handle = None
119+
self._jit_options = jit_options
120+
121+
self._backend_version = "new" if (_py_major_ver >= 12 and _driver_ver >= 12000) else "old"
122+
self._loader = _backend[self._backend_version]
123+
124+
self._code_type = code_type
125+
self._module = module
126+
self._sym_map = {} if symbol_mapping is None else symbol_mapping
117127

118-
backend = "new" if (_py_major_ver >= 12 and _driver_ver >= 12000) else "old"
119-
self._loader = _backend[backend]
128+
# TODO: do we want to unload in a finalizer? Probably not..
120129

130+
def _lazy_load_module(self, *args, **kwargs):
131+
if self._handle is not None:
132+
return
133+
jit_options = self._jit_options
134+
module = self._module
121135
if isinstance(module, str):
122136
# TODO: this option is only taken by the new library APIs, but we have
123137
# a bug that we can't easily support it just yet (NVIDIA/cuda-python#73).
124138
if jit_options is not None:
125139
raise ValueError
126-
module = module.encode()
127140
self._handle = handle_return(self._loader["file"](module))
128141
else:
129142
assert isinstance(module, bytes)
130143
if jit_options is None:
131144
jit_options = {}
132-
if backend == "new":
145+
if self._backend_version == "new":
133146
args = (
134147
module,
135148
list(jit_options.keys()),
@@ -141,15 +154,15 @@ def __init__(self, module, code_type, jit_options=None, *, symbol_mapping=None):
141154
0,
142155
)
143156
else: # "old" backend
144-
args = (module, len(jit_options), list(jit_options.keys()), list(jit_options.values()))
157+
args = (
158+
module,
159+
len(jit_options),
160+
list(jit_options.keys()),
161+
list(jit_options.values()),
162+
)
145163
self._handle = handle_return(self._loader["data"](*args))
146164

147-
self._code_type = code_type
148-
self._module = module
149-
self._sym_map = {} if symbol_mapping is None else symbol_mapping
150-
151-
# TODO: do we want to unload in a finalizer? Probably not..
152-
165+
@precondition(_lazy_load_module)
153166
def get_kernel(self, name):
154167
"""Return the :obj:`Kernel` of a specified name from this object code.
155168
@@ -168,6 +181,7 @@ def get_kernel(self, name):
168181
name = self._sym_map[name]
169182
except KeyError:
170183
name = name.encode()
184+
171185
data = handle_return(self._loader["kernel"](self._handle, name))
172186
return Kernel._from_obj(data, self)
173187

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
2+
#
3+
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
4+
5+
from typing import Tuple
6+
7+
from cuda import cuda, cudart
8+
from cuda.core.experimental._device import Device
9+
from cuda.core.experimental._utils import handle_return
10+
11+
12+
class System:
13+
"""Provide information about the cuda system.
14+
This class is a singleton and should not be instantiated directly.
15+
"""
16+
17+
_instance = None
18+
19+
def __new__(cls):
20+
if cls._instance is None:
21+
cls._instance = super().__new__(cls)
22+
return cls._instance
23+
24+
def __init__(self):
25+
if hasattr(self, "_initialized") and self._initialized:
26+
return
27+
self._initialized = True
28+
29+
@property
30+
def driver_version(self) -> Tuple[int, int]:
31+
"""
32+
Query the CUDA driver version.
33+
34+
Returns
35+
-------
36+
tuple of int
37+
A 2-tuple of (major, minor) version numbers.
38+
"""
39+
version = handle_return(cuda.cuDriverGetVersion())
40+
major = version // 1000
41+
minor = (version % 1000) // 10
42+
return (major, minor)
43+
44+
@property
45+
def num_devices(self) -> int:
46+
"""
47+
Query the number of available GPUs.
48+
49+
Returns
50+
-------
51+
int
52+
The number of available GPU devices.
53+
"""
54+
return handle_return(cudart.cudaGetDeviceCount())
55+
56+
@property
57+
def devices(self) -> tuple:
58+
"""
59+
Query the available device instances.
60+
61+
Returns
62+
-------
63+
tuple of Device
64+
A tuple containing instances of available devices.
65+
"""
66+
total = self.num_devices
67+
return tuple(Device(device_id) for device_id in range(total))

cuda_core/docs/source/api.rst

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,17 @@ CUDA compilation toolchain
3838
LinkerOptions
3939

4040

41+
CUDA system information
42+
-----------------------
43+
44+
.. autodata:: cuda.core.experimental.system.driver_version
45+
:no-value:
46+
.. autodata:: cuda.core.experimental.system.num_devices
47+
:no-value:
48+
.. autodata:: cuda.core.experimental.system.devices
49+
:no-value:
50+
51+
4152
.. module:: cuda.core.experimental.utils
4253

4354
Utility functions

cuda_core/docs/source/conf.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,3 +91,31 @@
9191

9292
napoleon_google_docstring = False
9393
napoleon_numpy_docstring = True
94+
95+
96+
section_titles = ["Returns"]
97+
def autodoc_process_docstring(app, what, name, obj, options, lines):
98+
if name.startswith("cuda.core.experimental.system"):
99+
# patch the docstring (in lines) *in-place*. Should docstrings include section titles other than "Returns",
100+
# this will need to be modified to handle them.
101+
attr = name.split(".")[-1]
102+
from cuda.core.experimental._system import System
103+
104+
lines_new = getattr(System, attr).__doc__.split("\n")
105+
formatted_lines = []
106+
for line in lines_new:
107+
title = line.strip()
108+
if title in section_titles:
109+
formatted_lines.append(line.replace(title, f".. rubric:: {title}"))
110+
elif line.strip() == "-" * len(title):
111+
formatted_lines.append(" " * len(title))
112+
else:
113+
formatted_lines.append(line)
114+
n_pops = len(lines)
115+
lines.extend(formatted_lines)
116+
for _ in range(n_pops):
117+
lines.pop(0)
118+
119+
120+
def setup(app):
121+
app.connect("autodoc-process-docstring", autodoc_process_docstring)

0 commit comments

Comments
 (0)