Skip to content

Commit 2fd4cc4

Browse files
authored
Meshlet texture atomics (#17765)
* Use texture atomics rather than buffer atomics for the visbuffer (haven't tested perf on a raster-heavy scene yet) * Unfortunately to clear the visbuffer we now need a compute pass to clear it. Using wgpu's clear_texture function internally uses a buffer -> image copy that's insanely expensive. Ideally it should be using vkCmdClearColorImage, which I've opened an issue for gfx-rs/wgpu#7090. For now we'll have to stick with a custom compute pass and all the extra code that brings. * Faster resolve depth pass by discarding 0 depth pixels instead of redundantly writing zero (2x faster for big depth textures like shadow views)
1 parent 2f9613f commit 2fd4cc4

12 files changed

+328
-131
lines changed

crates/bevy_core_pipeline/src/experimental/mip_generation/downsample_depth.wgsl

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT
2-
@group(0) @binding(0) var<storage, read> mip_0: array<u64>; // Per pixel
2+
@group(0) @binding(0) var mip_0: texture_storage_2d<r64uint, read>;
33
#else
44
#ifdef MESHLET
5-
@group(0) @binding(0) var<storage, read> mip_0: array<u32>; // Per pixel
5+
@group(0) @binding(0) var mip_0: texture_storage_2d<r32uint, read>;
66
#else // MESHLET
77
#ifdef MULTISAMPLE
88
@group(0) @binding(0) var mip_0: texture_depth_multisampled_2d;
@@ -24,7 +24,7 @@
2424
@group(0) @binding(11) var mip_11: texture_storage_2d<r32float, write>;
2525
@group(0) @binding(12) var mip_12: texture_storage_2d<r32float, write>;
2626
@group(0) @binding(13) var samplr: sampler;
27-
struct Constants { max_mip_level: u32, view_width: u32 }
27+
struct Constants { max_mip_level: u32 }
2828
var<push_constant> constants: Constants;
2929

3030
/// Generates a hierarchical depth buffer.
@@ -39,7 +39,6 @@ var<workgroup> intermediate_memory: array<array<f32, 16>, 16>;
3939
@compute
4040
@workgroup_size(256, 1, 1)
4141
fn downsample_depth_first(
42-
@builtin(num_workgroups) num_workgroups: vec3u,
4342
@builtin(workgroup_id) workgroup_id: vec3u,
4443
@builtin(local_invocation_index) local_invocation_index: u32,
4544
) {
@@ -309,12 +308,13 @@ fn reduce_load_mip_6(tex: vec2u) -> f32 {
309308
}
310309

311310
fn load_mip_0(x: u32, y: u32) -> f32 {
312-
let i = y * constants.view_width + x;
313311
#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT
314-
return bitcast<f32>(u32(mip_0[i] >> 32u));
312+
let visibility = textureLoad(mip_0, vec2(x, y)).r;
313+
return bitcast<f32>(u32(visibility >> 32u));
315314
#else // MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT
316315
#ifdef MESHLET
317-
return bitcast<f32>(mip_0[i]);
316+
let visibility = textureLoad(mip_0, vec2(x, y)).r;
317+
return bitcast<f32>(visibility);
318318
#else // MESHLET
319319
// Downsample the top level.
320320
#ifdef MULTISAMPLE

crates/bevy_core_pipeline/src/experimental/mip_generation/mod.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -427,7 +427,7 @@ impl SpecializedComputePipeline for DownsampleDepthPipeline {
427427
layout: vec![self.bind_group_layout.clone()],
428428
push_constant_ranges: vec![PushConstantRange {
429429
stages: ShaderStages::COMPUTE,
430-
range: 0..8,
430+
range: 0..4,
431431
}],
432432
shader: DOWNSAMPLE_DEPTH_SHADER_HANDLE,
433433
shader_defs,
@@ -627,9 +627,8 @@ impl ViewDepthPyramid {
627627
timestamp_writes: None,
628628
});
629629
downsample_pass.set_pipeline(downsample_depth_first_pipeline);
630-
// Pass the mip count and the texture width as push constants, for
631-
// simplicity.
632-
downsample_pass.set_push_constants(0, bytemuck::cast_slice(&[self.mip_count, view_size.x]));
630+
// Pass the mip count as a push constant, for simplicity.
631+
downsample_pass.set_push_constants(0, &self.mip_count.to_le_bytes());
633632
downsample_pass.set_bind_group(0, downsample_depth_bind_group, &[]);
634633
downsample_pass.dispatch_workgroups(view_size.x.div_ceil(64), view_size.y.div_ceil(64), 1);
635634

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT
2+
@group(0) @binding(0) var meshlet_visibility_buffer: texture_storage_2d<r64uint, write>;
3+
#else
4+
@group(0) @binding(0) var meshlet_visibility_buffer: texture_storage_2d<r32uint, write>;
5+
#endif
6+
var<push_constant> view_size: vec2<u32>;
7+
8+
@compute
9+
@workgroup_size(16, 16, 1)
10+
fn clear_visibility_buffer(@builtin(global_invocation_id) global_id: vec3<u32>) {
11+
if any(global_id.xy >= view_size) { return; }
12+
13+
#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT
14+
textureStore(meshlet_visibility_buffer, global_id.xy, vec4(0lu));
15+
#else
16+
textureStore(meshlet_visibility_buffer, global_id.xy, vec4(0u));
17+
#endif
18+
}

crates/bevy_pbr/src/meshlet/meshlet_bindings.wgsl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -100,9 +100,9 @@ fn cluster_is_second_pass_candidate(cluster_id: u32) -> bool {
100100
@group(0) @binding(6) var<storage, read> meshlet_raster_clusters: array<u32>; // Single object shared between all workgroups
101101
@group(0) @binding(7) var<storage, read> meshlet_software_raster_cluster_count: u32;
102102
#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT
103-
@group(0) @binding(8) var<storage, read_write> meshlet_visibility_buffer: array<atomic<u64>>; // Per pixel
103+
@group(0) @binding(8) var meshlet_visibility_buffer: texture_storage_2d<r64uint, atomic>;
104104
#else
105-
@group(0) @binding(8) var<storage, read_write> meshlet_visibility_buffer: array<atomic<u32>>; // Per pixel
105+
@group(0) @binding(8) var meshlet_visibility_buffer: texture_storage_2d<r32uint, atomic>;
106106
#endif
107107
@group(0) @binding(9) var<uniform> view: View;
108108

@@ -149,7 +149,7 @@ fn get_meshlet_vertex_position(meshlet: ptr<function, Meshlet>, vertex_id: u32)
149149
#endif
150150

151151
#ifdef MESHLET_MESH_MATERIAL_PASS
152-
@group(1) @binding(0) var<storage, read> meshlet_visibility_buffer: array<u64>; // Per pixel
152+
@group(1) @binding(0) var meshlet_visibility_buffer: texture_storage_2d<r64uint, read>;
153153
@group(1) @binding(1) var<storage, read> meshlet_cluster_meshlet_ids: array<u32>; // Per cluster
154154
@group(1) @binding(2) var<storage, read> meshlets: array<Meshlet>; // Per meshlet
155155
@group(1) @binding(3) var<storage, read> meshlet_indices: array<u32>; // Many per meshlet

crates/bevy_pbr/src/meshlet/mod.rs

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -106,9 +106,9 @@ const MESHLET_MESH_MATERIAL_SHADER_HANDLE: Handle<Shader> =
106106
/// * Requires preprocessing meshes. See [`MeshletMesh`] for details.
107107
/// * Limitations on the kinds of materials you can use. See [`MeshletMesh`] for details.
108108
///
109-
/// This plugin requires a fairly recent GPU that supports [`WgpuFeatures::SHADER_INT64_ATOMIC_MIN_MAX`].
109+
/// This plugin requires a fairly recent GPU that supports [`WgpuFeatures::TEXTURE_INT64_ATOMIC`].
110110
///
111-
/// This plugin currently works only on the Vulkan backend.
111+
/// This plugin currently works only on the Vulkan and Metal backends.
112112
///
113113
/// This plugin is not compatible with [`Msaa`]. Any camera rendering a [`MeshletMesh`] must have
114114
/// [`Msaa`] set to [`Msaa::Off`].
@@ -133,7 +133,8 @@ pub struct MeshletPlugin {
133133
impl MeshletPlugin {
134134
/// [`WgpuFeatures`] required for this plugin to function.
135135
pub fn required_wgpu_features() -> WgpuFeatures {
136-
WgpuFeatures::SHADER_INT64_ATOMIC_MIN_MAX
136+
WgpuFeatures::TEXTURE_INT64_ATOMIC
137+
| WgpuFeatures::TEXTURE_ATOMIC
137138
| WgpuFeatures::SHADER_INT64
138139
| WgpuFeatures::SUBGROUP
139140
| WgpuFeatures::DEPTH_CLIP_CONTROL
@@ -151,6 +152,12 @@ impl Plugin for MeshletPlugin {
151152
std::process::exit(1);
152153
}
153154

155+
load_internal_asset!(
156+
app,
157+
MESHLET_CLEAR_VISIBILITY_BUFFER_SHADER_HANDLE,
158+
"clear_visibility_buffer.wgsl",
159+
Shader::from_wgsl
160+
);
154161
load_internal_asset!(
155162
app,
156163
MESHLET_BINDINGS_SHADER_HANDLE,

0 commit comments

Comments
 (0)