Skip to content

Replace vek with glam. #180

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions crates/cuda_std/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ repository = "https://github.com/Rust-GPU/Rust-CUDA"
readme = "../../README.md"

[dependencies]
glam = { version = ">=0.22", default-features = false, features = ["libm", "cuda", "bytemuck"] }
vek = { version = "0.17.1", default-features = false, features = ["libm"] }
cuda_std_macros = { version = "0.2", path = "../cuda_std_macros" }
half = "2.4.1"
Expand Down
2 changes: 2 additions & 0 deletions crates/cuda_std/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,9 @@ mod float_ext;
pub use cuda_std_macros::*;
pub use float::GpuFloat;
pub use float_ext::*;
pub use glam;
pub use half;
#[deprecated(note = "The `vek` module is deprecated, use `glam` instead.")]
pub use vek;

pub use half::{bf16, f16};
Expand Down
32 changes: 16 additions & 16 deletions crates/cuda_std/src/rt/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,23 +152,23 @@ impl<'a> From<&'a GridSize> for GridSize {
other.clone()
}
}
impl From<vek::Vec2<u32>> for GridSize {
fn from(vec: vek::Vec2<u32>) -> Self {
impl From<glam::UVec2> for GridSize {
fn from(vec: glam::UVec2) -> Self {
GridSize::xy(vec.x, vec.y)
}
}
impl From<vek::Vec3<u32>> for GridSize {
fn from(vec: vek::Vec3<u32>) -> Self {
impl From<glam::UVec3> for GridSize {
fn from(vec: glam::UVec3) -> Self {
GridSize::xyz(vec.x, vec.y, vec.z)
}
}
impl From<vek::Vec2<usize>> for GridSize {
fn from(vec: vek::Vec2<usize>) -> Self {
impl From<glam::USizeVec2> for GridSize {
fn from(vec: glam::USizeVec2) -> Self {
GridSize::xy(vec.x as u32, vec.y as u32)
}
}
impl From<vek::Vec3<usize>> for GridSize {
fn from(vec: vek::Vec3<usize>) -> Self {
impl From<glam::USizeVec3> for GridSize {
fn from(vec: glam::USizeVec3) -> Self {
GridSize::xyz(vec.x as u32, vec.y as u32, vec.z as u32)
}
}
Expand Down Expand Up @@ -228,23 +228,23 @@ impl<'a> From<&'a BlockSize> for BlockSize {
other.clone()
}
}
impl From<vek::Vec2<u32>> for BlockSize {
fn from(vec: vek::Vec2<u32>) -> Self {
impl From<glam::UVec2> for BlockSize {
fn from(vec: glam::UVec2) -> Self {
BlockSize::xy(vec.x, vec.y)
}
}
impl From<vek::Vec3<u32>> for BlockSize {
fn from(vec: vek::Vec3<u32>) -> Self {
impl From<glam::UVec3> for BlockSize {
fn from(vec: glam::UVec3) -> Self {
BlockSize::xyz(vec.x, vec.y, vec.z)
}
}
impl From<vek::Vec2<usize>> for BlockSize {
fn from(vec: vek::Vec2<usize>) -> Self {
impl From<glam::USizeVec2> for BlockSize {
fn from(vec: glam::USizeVec2) -> Self {
BlockSize::xy(vec.x as u32, vec.y as u32)
}
}
impl From<vek::Vec3<usize>> for BlockSize {
fn from(vec: vek::Vec3<usize>) -> Self {
impl From<glam::USizeVec3> for BlockSize {
fn from(vec: glam::USizeVec3) -> Self {
BlockSize::xyz(vec.x as u32, vec.y as u32, vec.z as u32)
}
}
20 changes: 10 additions & 10 deletions crates/cuda_std/src/thread.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
// TODO: write some docs about the terms used in this module.

use cuda_std_macros::gpu_only;
use vek::{Vec2, Vec3};
use glam::{UVec2, UVec3};

// different calling conventions dont exist in nvptx, so we just use C as a placeholder.
extern "C" {
Expand Down Expand Up @@ -152,7 +152,7 @@ pub fn grid_dim_z() -> u32 {
/// Gets the 3d index of the thread currently executing the kernel.
#[gpu_only]
#[inline(always)]
pub fn thread_idx() -> Vec3<u32> {
pub fn thread_idx() -> UVec3 {
unsafe {
Vec3::new(
__nvvm_thread_idx_x(),
Expand All @@ -165,7 +165,7 @@ pub fn thread_idx() -> Vec3<u32> {
/// Gets the 3d index of the block that the thread currently executing the kernel is located in.
#[gpu_only]
#[inline(always)]
pub fn block_idx() -> Vec3<u32> {
pub fn block_idx() -> UVec3 {
unsafe {
Vec3::new(
__nvvm_block_idx_x(),
Expand All @@ -179,7 +179,7 @@ pub fn block_idx() -> Vec3<u32> {
/// how many threads exist in each thread block in every direction.
#[gpu_only]
#[inline(always)]
pub fn block_dim() -> Vec3<u32> {
pub fn block_dim() -> UVec3 {
unsafe {
Vec3::new(
__nvvm_block_dim_x(),
Expand All @@ -193,7 +193,7 @@ pub fn block_dim() -> Vec3<u32> {
/// how many thread blocks exist in each grid in every direction.
#[gpu_only]
#[inline(always)]
pub fn grid_dim() -> Vec3<u32> {
pub fn grid_dim() -> UVec3 {
unsafe {
Vec3::new(
__nvvm_grid_dim_x(),
Expand Down Expand Up @@ -232,26 +232,26 @@ pub fn index_1d() -> u32 {
}

#[inline(always)]
pub fn index_2d() -> Vec2<u32> {
pub fn index_2d() -> UVec2 {
let i = thread_idx_x() + block_idx_x() * block_dim_x();
let j = thread_idx_y() + block_idx_y() * block_dim_y();
Vec2::new(i, j)
UVec2::new(i, j)
}

#[inline(always)]
pub fn index_3d() -> Vec3<u32> {
pub fn index_3d() -> UVec3 {
let i = thread_idx_x() + block_idx_x() * block_dim_x();
let j = thread_idx_y() + block_idx_y() * block_dim_y();
let k = thread_idx_z() + block_idx_z() * block_dim_z();
Vec3::new(i, j, k)
UVec3::new(i, j, k)
}

/// Whether this is the first thread (not the first thread to be executing). This function is guaranteed
/// to only return true in a single thread that is invoking it. This is useful for only doing something
/// once.
#[inline(always)]
pub fn first() -> bool {
block_idx() == Vec3::zero() && thread_idx() == Vec3::zero()
block_idx() == UVec3::ZERO && thread_idx() == UVec3::ZERO
}

/// Gets the number of threads inside of a warp. Currently 32 threads on every GPU architecture.
Expand Down
1 change: 1 addition & 0 deletions crates/cust/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ Notable changes to this project will be documented in this file.

## Unreleased

- `cuda_std::vek` is now deprecated. Use `cuda_std::glam`.
- Add `memory::memcpy_dtoh` to allow copying from device to host.
- `DeviceSlice` is represented as a slice again, but as `[()]` instead of `[T]`.
- Reimplemented `Index` and `IndexMut` for `DeviceSlice` and removed `DeviceSlice::index`.
Expand Down
9 changes: 7 additions & 2 deletions crates/optix_device/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,17 @@
name = "optix_device"
version = "0.1.0"
edition = "2021"
authors = ["Anders Langlands <[email protected]>", "Riccardo D'Ambrosio <[email protected]>"]
authors = [
"Anders Langlands <[email protected]>",
"Riccardo D'Ambrosio <[email protected]>"
]

[dependencies]
bitflags = "2.8"
cuda_std = { version = "0.2", path = "../cuda_std" }
glam = { version = "0.29", features=["cuda", "libm"], default-features=false }
paste = "1.0.15"
seq-macro = "0.3.5"
cust_core = { version = "0.1", path = "../cust_core" }

[target.'cfg(not(target_os = "cuda"))'.dependencies]
glam = { version = "0.29", features = ["cuda"], default-features = false }
2 changes: 1 addition & 1 deletion crates/optix_device/src/hit.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#[cfg(target_os = "cuda")]
use core::arch::asm;
use cuda_std::gpu_only;
use cuda_std::{glam, gpu_only};
use glam::Vec3;
/// The type of primitive that a ray hit.
#[repr(u32)]
Expand Down
3 changes: 1 addition & 2 deletions crates/optix_device/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,7 @@ pub mod trace;
pub mod transform;
pub mod util;

use cuda_std::*;
pub use glam;
use cuda_std::{glam, *};
use glam::UVec3;
pub use misc::*;

Expand Down
2 changes: 1 addition & 1 deletion crates/optix_device/src/ray.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::trace::*;
#[cfg(target_os = "cuda")]
use core::arch::asm;
use cuda_std::gpu_only;
use cuda_std::{glam, gpu_only};
use glam::Vec3;

/// Returns the ray origin that was passed into [`trace`] in world-space.
Expand Down
2 changes: 1 addition & 1 deletion crates/optix_device/src/sys.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
use crate::trace::{RayFlags, TraversableHandle};
#[cfg(target_os = "cuda")]
use core::arch::asm;
use cuda_std::gpu_only;
use cuda_std::{glam, gpu_only};
use glam::Vec3;
use paste::paste;

Expand Down
4 changes: 2 additions & 2 deletions examples/cuda/cpu/path_tracer/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ version = "0.1.0"
edition = "2018"

[dependencies]
vek = { version = "0.17.1", features = ["bytemuck", "mint"] }
glam = { version = "0.30.1", features = ["bytemuck", "cuda"] }
bytemuck = { version = "1.21", features = ["derive"] }
cust = { version = "0.3", path = "../../../../crates/cust", features = ["impl_vek"] }
cust = { version = "0.3", path = "../../../../crates/cust", features = ["impl_glam"] }
image = "0.25.5"
path_tracer_gpu = { path = "../../gpu/path_tracer_gpu" }
gpu_rand = { version = "0.1", path = "../../../../crates/gpu_rand" }
Expand Down
10 changes: 5 additions & 5 deletions examples/cuda/cpu/path_tracer/src/common.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
use glam::{Vec2, Vec3};
use glium::glutin::event::{
ElementState, Event, MouseButton, MouseScrollDelta, VirtualKeyCode, WindowEvent,
};
use path_tracer_gpu::Viewport;
use vek::{Vec2, Vec3};

#[derive(Debug, Clone, Copy, PartialEq)]
pub struct Camera {
pub origin: Vec3<f32>,
pub lookat: Vec3<f32>,
pub vup: Vec3<f32>,
pub origin: Vec3,
pub lookat: Vec3,
pub vup: Vec3,
pub fov: f32,
pub aspect_ratio: f32,
}
Expand Down Expand Up @@ -43,7 +43,7 @@ pub struct CameraController {
}

impl CameraController {
pub fn new(dimensions: Vec2<usize>) -> Self {
pub fn new(dimensions: USizeVec2) -> Self {
CameraController {
sensitivity: 0.1,
last_mouse_pos: dimensions.numcast().unwrap() / 2.0,
Expand Down
8 changes: 4 additions & 4 deletions examples/cuda/cpu/path_tracer/src/cpu/mod.rs
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
use std::time::Duration;

use glam::{Clamp, Vec2, Vec3};
use gpu_rand::{DefaultRand, GpuRand};
use imgui::Ui;
use path_tracer_gpu::{
material::MaterialKind, render::generate_ray, scene::Scene, Object, Viewport,
};
use rayon::prelude::*;
use sysinfo::System;
use vek::{Clamp, Vec2, Vec3};

use crate::{common::Camera, cuda::SEED};

pub struct CpuRenderer {
// this is basically the cuda buffers but not gpu buffers.
accumulated_buffer: Vec<Vec3<f32>>,
accumulated_buffer: Vec<Vec3>,
out_buffer: Vec<Vec3<u8>>,

viewport: Viewport,
Expand All @@ -23,7 +23,7 @@ pub struct CpuRenderer {
}

impl CpuRenderer {
pub fn new(dimensions: Vec2<usize>, camera: &Camera, scene: &Scene) -> Self {
pub fn new(dimensions: USizeVec2, camera: &Camera, scene: &Scene) -> Self {
let accumulated_buffer = vec![Vec3::zero(); dimensions.product()];
let out_buffer = vec![Vec3::zero(); dimensions.product()];

Expand Down Expand Up @@ -67,7 +67,7 @@ impl CpuRenderer {
new_camera.as_viewport(&mut self.viewport);
}

pub fn resize(&mut self, dimensions: Vec2<usize>) {
pub fn resize(&mut self, dimensions: USizeVec2) {
self.accumulated_buffer
.resize(dimensions.product(), Vec3::zero());
self.out_buffer.resize(dimensions.product(), Vec3::zero());
Expand Down
2 changes: 1 addition & 1 deletion examples/cuda/cpu/path_tracer/src/cuda/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ use cust::{
memory::{DeviceBuffer, DeviceCopy, UnifiedBuffer},
util::SliceExt,
};
use glam::{Vec2, Vec3};
use gpu_rand::DefaultRand;
use path_tracer_gpu::{material::MaterialKind, scene::Scene, Object, Viewport};
use vek::{Vec2, Vec3};

use super::SEED;

Expand Down
2 changes: 1 addition & 1 deletion examples/cuda/cpu/path_tracer/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,14 @@ pub mod renderer;
pub mod viewer;

use common::Camera;
use glam::Vec3;
use path_tracer_gpu::{
material::{DielectricMaterial, DiffuseMaterial, MaterialKind, MetallicMaterial},
scene::Scene,
sphere::Sphere,
Object,
};
use std::error::Error;
use vek::Vec3;

pub const WIDTH: u32 = 1920;
pub const HEIGHT: u32 = 1080;
Expand Down
9 changes: 4 additions & 5 deletions examples/cuda/gpu/path_tracer_gpu/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,19 @@ pub mod render_kernels;
pub mod scene;
pub mod sphere;

pub use cuda_std::vek;
pub use cuda_std::glam;
use cust_core::DeviceCopy;
use enum_dispatch::enum_dispatch;
use hittable::{HitRecord, Hittable};
use sphere::Sphere;

pub type Vec3<T = f32> = vek::Vec3<T>;
pub type Point<T = f32> = vek::Vec3<T>;
pub type Vec2<T = f32> = vek::Vec2<T>;
use glam::{USizeVec2, Vec2, Vec3};
pub type Point = Vec3;

#[derive(Default, Clone, Copy, DeviceCopy)]
#[repr(C)]
pub struct Viewport {
pub bounds: vek::Vec2<usize>,
pub bounds: USizeVec2,
pub lower_left: Vec3,
pub horizontal: Vec3,
pub vertical: Vec3,
Expand Down
2 changes: 1 addition & 1 deletion examples/cuda/gpu/path_tracer_gpu/src/render.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ pub fn color(ray: Ray) -> Vec3 {
(1.0 - t) * Vec3::one() + t * Vec3::new(0.5, 0.7, 1.0)
}

pub fn generate_ray(idx: vek::Vec2<u32>, view: &Viewport, offset: Vec2) -> Ray {
pub fn generate_ray(idx: UVec2, view: &Viewport, offset: Vec2) -> Ray {
let uv = (idx.numcast::<f32>().unwrap() + offset) / view.bounds.numcast().unwrap();
Ray {
origin: view.origin,
Expand Down
7 changes: 4 additions & 3 deletions examples/cuda/gpu/path_tracer_gpu/src/render_kernels.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use crate::{render::*, scene::Scene, *};
use cuda_std::{vek::Clamp, *};
use cuda_std::*;
use glam::{U8Vec3, Vec2, Vec3};
use gpu_rand::{DefaultRand, GpuRand};

#[kernel]
Expand Down Expand Up @@ -38,7 +39,7 @@ pub unsafe fn scale_buffer(fb: *const Vec3, out: *mut Vec3, samples: u32, view:

/// Postprocesses a (scaled) buffer into a final u8 buffer.
#[kernel]
pub unsafe fn postprocess(fb: *const Vec3, out: *mut vek::Vec3<u8>, view: Viewport) {
pub unsafe fn postprocess(fb: *const Vec3, out: *mut U8Vec3, view: Viewport) {
let idx_2d = thread::index_2d();
if idx_2d.x >= view.bounds.x as u32 || idx_2d.y >= view.bounds.y as u32 {
return;
Expand All @@ -50,7 +51,7 @@ pub unsafe fn postprocess(fb: *const Vec3, out: *mut vek::Vec3<u8>, view: Viewpo
let gamma_corrected = original.sqrt();

*out = (gamma_corrected * 255.0)
.clamped(Vec3::zero(), Vec3::broadcast(255.0))
.clamp(Vec3::zero(), Vec3::broadcast(255.0))
.numcast()
.unwrap();
}
Loading