Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions crates/cuda_builder/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -809,9 +809,6 @@ fn invoke_rustc(builder: &CudaBuilder) -> Result<PathBuf, CudaBuilderError> {
}
}

let arch = format!("{:?}0", builder.arch);
cargo.env("CUDA_ARCH", arch.strip_prefix("Compute").unwrap());

let cargo_encoded_rustflags = join_checking_for_separators(rustflags, "\x1f");

let build = cargo
Expand Down
19 changes: 7 additions & 12 deletions crates/cuda_std/src/atomic/mid.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,14 @@
#![allow(dead_code, unused_imports)]

use super::intrinsics;
use crate::cfg::ComputeCapability;
use crate::gpu_only;
use core::sync::atomic::Ordering::{self, *};
use paste::paste;

fn ge_sm70() -> bool {
ComputeCapability::from_cuda_arch_env() >= ComputeCapability::Compute70
}

#[gpu_only]
pub fn device_thread_fence(ordering: Ordering) {
unsafe {
if ge_sm70() {
if cfg!(target_feature = "compute_70") {
if ordering == SeqCst {
return intrinsics::fence_sc_device();
}
Expand All @@ -38,7 +33,7 @@ pub fn device_thread_fence(ordering: Ordering) {
#[gpu_only]
pub fn block_thread_fence(ordering: Ordering) {
unsafe {
if ge_sm70() {
if cfg!(target_feature = "compute_70") {
if ordering == SeqCst {
return intrinsics::fence_sc_block();
}
Expand All @@ -57,7 +52,7 @@ pub fn block_thread_fence(ordering: Ordering) {
#[gpu_only]
pub fn system_thread_fence(ordering: Ordering) {
unsafe {
if ge_sm70() {
if cfg!(target_feature = "compute_70") {
if ordering == SeqCst {
return intrinsics::fence_sc_system();
}
Expand All @@ -80,7 +75,7 @@ macro_rules! load {
#[$crate::gpu_only]
#[allow(clippy::missing_safety_doc)]
pub unsafe fn [<atomic_load_ $width _ $scope>](ptr: *mut $type, ordering: Ordering) -> $type {
if ge_sm70() {
if cfg!(target_feature = "compute_70") {
match ordering {
SeqCst => {
intrinsics::[<fence_sc_ $scope>]();
Expand Down Expand Up @@ -136,7 +131,7 @@ macro_rules! store {
#[$crate::gpu_only]
#[allow(clippy::missing_safety_doc)]
pub unsafe fn [<atomic_store_ $width _ $scope>](ptr: *mut $type, ordering: Ordering, val: $type) {
if ge_sm70() {
if cfg!(target_feature = "compute_70") {
match ordering {
SeqCst => {
intrinsics::[<fence_sc_ $scope>]();
Expand Down Expand Up @@ -185,7 +180,7 @@ macro_rules! inner_fetch_ops_1_param {
#[$crate::gpu_only]
#[allow(clippy::missing_safety_doc)]
pub unsafe fn [<atomic_fetch_ $op _ $type _ $scope>](ptr: *mut $type, ordering: Ordering, val: $type) -> $type {
if ge_sm70() {
if cfg!(target_feature = "compute_70") {
match ordering {
SeqCst => {
intrinsics::[<fence_sc_ $scope>]();
Expand Down Expand Up @@ -259,7 +254,7 @@ macro_rules! inner_cas {
#[$crate::gpu_only]
#[allow(clippy::missing_safety_doc)]
pub unsafe fn [<atomic_compare_and_swap_ $type _ $scope>](ptr: *mut $type, current: $type, new: $type, ordering: Ordering) -> $type {
if ge_sm70() {
if cfg!(target_feature = "compute_70") {
match ordering {
SeqCst => {
intrinsics::[<fence_sc_ $scope>]();
Expand Down
48 changes: 0 additions & 48 deletions crates/cuda_std/src/cfg.rs

This file was deleted.

1 change: 0 additions & 1 deletion crates/cuda_std/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ pub mod misc;
// WIP
// pub mod rt;
pub mod atomic;
pub mod cfg;
pub mod ptr;
pub mod shared;
pub mod thread;
Expand Down
26 changes: 1 addition & 25 deletions crates/cust/src/module.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,30 +34,6 @@ pub enum OptLevel {
O4 = 4,
}

/// The possible targets when JIT compiling a PTX module.
#[non_exhaustive]
#[repr(u32)]
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum JitTarget {
Compute20 = 20,
Compute21 = 21,
Compute30 = 30,
Compute32 = 32,
Compute35 = 35,
Compute37 = 37,
Compute50 = 50,
Compute52 = 52,
Compute53 = 53,
Compute60 = 60,
Compute61 = 61,
Compute62 = 62,
Compute70 = 70,
Compute72 = 72,
Compute75 = 75,
Compute80 = 80,
Compute86 = 86,
}

/// How to handle cases where a loaded module's data does not contain an exact match for the
/// specified architecture.
#[repr(u32)]
Expand All @@ -82,7 +58,7 @@ pub enum ModuleJitOption {
/// [`ModuleJitOption::Target`].
DetermineTargetFromContext,
/// Specifies the target for the JIT compiler. Cannot be combined with [`ModuleJitOption::DetermineTargetFromContext`].
Target(JitTarget),
Target(driver_sys::CUjit_target),
/// Specifies how to handle cases where a loaded module's data does not have an exact match for the specified
/// architecture.
Fallback(JitFallback),
Expand Down
Loading
Loading