Skip to content

Commit ad011d0

Browse files
JMS55superdumpmockersfteoxoyIceSentry
authored
Add GpuArrayBuffer and BatchedUniformBuffer (#8204)
# Objective - Add a type for uploading a Rust `Vec<T>` to a GPU `array<T>`. - Makes progress towards #89. ## Solution - Port @superdump's `BatchedUniformBuffer` to bevy main, as a fallback for WebGL2, which doesn't support storage buffers. - Rather than getting an `array<T>` in a shader, you get an `array<T, N>`, and have to rebind every N elements via dynamic offsets. - Add `GpuArrayBuffer` to abstract over `StorageBuffer<Vec<T>>`/`BatchedUniformBuffer`. ## Future Work Add a shader macro kinda thing to abstract over the following automatically: #8204 (review) --- ## Changelog * Added `GpuArrayBuffer`, `GpuComponentArrayBufferPlugin`, `GpuArrayBufferable`, and `GpuArrayBufferIndex` types. * Added `DynamicUniformBuffer::new_with_alignment()`. --------- Co-authored-by: Robert Swain <[email protected]> Co-authored-by: François <[email protected]> Co-authored-by: Teodor Tanasoaia <[email protected]> Co-authored-by: IceSentry <[email protected]> Co-authored-by: Vincent <[email protected]> Co-authored-by: robtfm <[email protected]>
1 parent 264195e commit ad011d0

File tree

8 files changed

+359
-1
lines changed

8 files changed

+359
-1
lines changed
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
use crate::{
2+
render_resource::{GpuArrayBuffer, GpuArrayBufferable},
3+
renderer::{RenderDevice, RenderQueue},
4+
Render, RenderApp, RenderSet,
5+
};
6+
use bevy_app::{App, Plugin};
7+
use bevy_ecs::{
8+
prelude::{Component, Entity},
9+
schedule::IntoSystemConfigs,
10+
system::{Commands, Query, Res, ResMut},
11+
};
12+
use std::marker::PhantomData;
13+
14+
/// This plugin prepares the components of the corresponding type for the GPU
15+
/// by storing them in a [`GpuArrayBuffer`].
16+
pub struct GpuComponentArrayBufferPlugin<C: Component + GpuArrayBufferable>(PhantomData<C>);
17+
18+
impl<C: Component + GpuArrayBufferable> Plugin for GpuComponentArrayBufferPlugin<C> {
19+
fn build(&self, app: &mut App) {
20+
if let Ok(render_app) = app.get_sub_app_mut(RenderApp) {
21+
render_app
22+
.insert_resource(GpuArrayBuffer::<C>::new(
23+
render_app.world.resource::<RenderDevice>(),
24+
))
25+
.add_systems(
26+
Render,
27+
prepare_gpu_component_array_buffers::<C>.in_set(RenderSet::Prepare),
28+
);
29+
}
30+
}
31+
}
32+
33+
impl<C: Component + GpuArrayBufferable> Default for GpuComponentArrayBufferPlugin<C> {
34+
fn default() -> Self {
35+
Self(PhantomData::<C>)
36+
}
37+
}
38+
39+
fn prepare_gpu_component_array_buffers<C: Component + GpuArrayBufferable>(
40+
mut commands: Commands,
41+
render_device: Res<RenderDevice>,
42+
render_queue: Res<RenderQueue>,
43+
mut gpu_array_buffer: ResMut<GpuArrayBuffer<C>>,
44+
components: Query<(Entity, &C)>,
45+
) {
46+
gpu_array_buffer.clear();
47+
48+
let entities = components
49+
.iter()
50+
.map(|(entity, component)| (entity, gpu_array_buffer.push(component.clone())))
51+
.collect::<Vec<_>>();
52+
commands.insert_or_spawn_batch(entities);
53+
54+
gpu_array_buffer.write_buffer(&render_device, &render_queue);
55+
}

crates/bevy_render/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ pub mod extract_component;
1111
mod extract_param;
1212
pub mod extract_resource;
1313
pub mod globals;
14+
pub mod gpu_component_array_buffer;
1415
pub mod mesh;
1516
pub mod pipelined_rendering;
1617
pub mod primitives;
Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
use super::{GpuArrayBufferIndex, GpuArrayBufferable};
2+
use crate::{
3+
render_resource::DynamicUniformBuffer,
4+
renderer::{RenderDevice, RenderQueue},
5+
};
6+
use encase::{
7+
private::{ArrayMetadata, BufferMut, Metadata, RuntimeSizedArray, WriteInto, Writer},
8+
ShaderType,
9+
};
10+
use std::{marker::PhantomData, num::NonZeroU64};
11+
use wgpu::{BindingResource, Limits};
12+
13+
// 1MB else we will make really large arrays on macOS which reports very large
14+
// `max_uniform_buffer_binding_size`. On macOS this ends up being the minimum
15+
// size of the uniform buffer as well as the size of each chunk of data at a
16+
// dynamic offset.
17+
#[cfg(any(not(feature = "webgl"), not(target_arch = "wasm32")))]
18+
const MAX_REASONABLE_UNIFORM_BUFFER_BINDING_SIZE: u32 = 1 << 20;
19+
20+
// WebGL2 quirk: using uniform buffers larger than 4KB will cause extremely
21+
// long shader compilation times, so the limit needs to be lower on WebGL2.
22+
// This is due to older shader compilers/GPUs that don't support dynamically
23+
// indexing uniform buffers, and instead emulate it with large switch statements
24+
// over buffer indices that take a long time to compile.
25+
#[cfg(all(feature = "webgl", target_arch = "wasm32"))]
26+
const MAX_REASONABLE_UNIFORM_BUFFER_BINDING_SIZE: u32 = 1 << 12;
27+
28+
/// Similar to [`DynamicUniformBuffer`], except every N elements (depending on size)
29+
/// are grouped into a batch as an `array<T, N>` in WGSL.
30+
///
31+
/// This reduces the number of rebindings required due to having to pass dynamic
32+
/// offsets to bind group commands, and if indices into the array can be passed
33+
/// in via other means, it enables batching of draw commands.
34+
pub struct BatchedUniformBuffer<T: GpuArrayBufferable> {
35+
// Batches of fixed-size arrays of T are written to this buffer so that
36+
// each batch in a fixed-size array can be bound at a dynamic offset.
37+
uniforms: DynamicUniformBuffer<MaxCapacityArray<Vec<T>>>,
38+
// A batch of T are gathered into this `MaxCapacityArray` until it is full,
39+
// then it is written into the `DynamicUniformBuffer`, cleared, and new T
40+
// are gathered here, and so on for each batch.
41+
temp: MaxCapacityArray<Vec<T>>,
42+
current_offset: u32,
43+
dynamic_offset_alignment: u32,
44+
}
45+
46+
impl<T: GpuArrayBufferable> BatchedUniformBuffer<T> {
47+
pub fn batch_size(limits: &Limits) -> usize {
48+
(limits
49+
.max_uniform_buffer_binding_size
50+
.min(MAX_REASONABLE_UNIFORM_BUFFER_BINDING_SIZE) as u64
51+
/ T::min_size().get()) as usize
52+
}
53+
54+
pub fn new(limits: &Limits) -> Self {
55+
let capacity = Self::batch_size(limits);
56+
let alignment = limits.min_uniform_buffer_offset_alignment;
57+
58+
Self {
59+
uniforms: DynamicUniformBuffer::new_with_alignment(alignment as u64),
60+
temp: MaxCapacityArray(Vec::with_capacity(capacity), capacity),
61+
current_offset: 0,
62+
dynamic_offset_alignment: alignment,
63+
}
64+
}
65+
66+
#[inline]
67+
pub fn size(&self) -> NonZeroU64 {
68+
self.temp.size()
69+
}
70+
71+
pub fn clear(&mut self) {
72+
self.uniforms.clear();
73+
self.current_offset = 0;
74+
self.temp.0.clear();
75+
}
76+
77+
pub fn push(&mut self, component: T) -> GpuArrayBufferIndex<T> {
78+
let result = GpuArrayBufferIndex {
79+
index: self.temp.0.len() as u32,
80+
dynamic_offset: Some(self.current_offset),
81+
element_type: PhantomData,
82+
};
83+
self.temp.0.push(component);
84+
if self.temp.0.len() == self.temp.1 {
85+
self.flush();
86+
}
87+
result
88+
}
89+
90+
pub fn flush(&mut self) {
91+
self.uniforms.push(self.temp.clone());
92+
93+
self.current_offset +=
94+
align_to_next(self.temp.size().get(), self.dynamic_offset_alignment as u64) as u32;
95+
96+
self.temp.0.clear();
97+
}
98+
99+
pub fn write_buffer(&mut self, device: &RenderDevice, queue: &RenderQueue) {
100+
if !self.temp.0.is_empty() {
101+
self.flush();
102+
}
103+
self.uniforms.write_buffer(device, queue);
104+
}
105+
106+
#[inline]
107+
pub fn binding(&self) -> Option<BindingResource> {
108+
let mut binding = self.uniforms.binding();
109+
if let Some(BindingResource::Buffer(binding)) = &mut binding {
110+
// MaxCapacityArray is runtime-sized so can't use T::min_size()
111+
binding.size = Some(self.size());
112+
}
113+
binding
114+
}
115+
}
116+
117+
#[inline]
118+
fn align_to_next(value: u64, alignment: u64) -> u64 {
119+
debug_assert!(alignment & (alignment - 1) == 0);
120+
((value - 1) | (alignment - 1)) + 1
121+
}
122+
123+
// ----------------------------------------------------------------------------
124+
// MaxCapacityArray was implemented by Teodor Tanasoaia for encase. It was
125+
// copied here as it was not yet included in an encase release and it is
126+
// unclear if it is the correct long-term solution for encase.
127+
128+
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord)]
129+
struct MaxCapacityArray<T>(T, usize);
130+
131+
impl<T> ShaderType for MaxCapacityArray<T>
132+
where
133+
T: ShaderType<ExtraMetadata = ArrayMetadata>,
134+
{
135+
type ExtraMetadata = ArrayMetadata;
136+
137+
const METADATA: Metadata<Self::ExtraMetadata> = T::METADATA;
138+
139+
fn size(&self) -> ::core::num::NonZeroU64 {
140+
Self::METADATA.stride().mul(self.1.max(1) as u64).0
141+
}
142+
}
143+
144+
impl<T> WriteInto for MaxCapacityArray<T>
145+
where
146+
T: WriteInto + RuntimeSizedArray,
147+
{
148+
fn write_into<B: BufferMut>(&self, writer: &mut Writer<B>) {
149+
debug_assert!(self.0.len() <= self.1);
150+
self.0.write_into(writer);
151+
}
152+
}

crates/bevy_render/src/render_resource/buffer_vec.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,11 @@ use wgpu::BufferUsages;
2121
/// from system RAM to VRAM.
2222
///
2323
/// Other options for storing GPU-accessible data are:
24+
/// * [`StorageBuffer`](crate::render_resource::StorageBuffer)
2425
/// * [`DynamicStorageBuffer`](crate::render_resource::DynamicStorageBuffer)
2526
/// * [`UniformBuffer`](crate::render_resource::UniformBuffer)
2627
/// * [`DynamicUniformBuffer`](crate::render_resource::DynamicUniformBuffer)
28+
/// * [`GpuArrayBuffer`](crate::render_resource::GpuArrayBuffer)
2729
/// * [`BufferVec`](crate::render_resource::BufferVec)
2830
/// * [`Texture`](crate::render_resource::Texture)
2931
pub struct BufferVec<T: Pod> {
Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
use super::StorageBuffer;
2+
use crate::{
3+
render_resource::batched_uniform_buffer::BatchedUniformBuffer,
4+
renderer::{RenderDevice, RenderQueue},
5+
};
6+
use bevy_ecs::{prelude::Component, system::Resource};
7+
use encase::{private::WriteInto, ShaderSize, ShaderType};
8+
use std::{marker::PhantomData, mem};
9+
use wgpu::{BindGroupLayoutEntry, BindingResource, BindingType, BufferBindingType, ShaderStages};
10+
11+
/// Trait for types able to go in a [`GpuArrayBuffer`].
12+
pub trait GpuArrayBufferable: ShaderType + ShaderSize + WriteInto + Clone {}
13+
impl<T: ShaderType + ShaderSize + WriteInto + Clone> GpuArrayBufferable for T {}
14+
15+
/// Stores an array of elements to be transferred to the GPU and made accessible to shaders as a read-only array.
16+
///
17+
/// On platforms that support storage buffers, this is equivalent to [`StorageBuffer<Vec<T>>`].
18+
/// Otherwise, this falls back to a dynamic offset uniform buffer with the largest
19+
/// array of T that fits within a uniform buffer binding (within reasonable limits).
20+
///
21+
/// Other options for storing GPU-accessible data are:
22+
/// * [`StorageBuffer`](crate::render_resource::StorageBuffer)
23+
/// * [`DynamicStorageBuffer`](crate::render_resource::DynamicStorageBuffer)
24+
/// * [`UniformBuffer`](crate::render_resource::UniformBuffer)
25+
/// * [`DynamicUniformBuffer`](crate::render_resource::DynamicUniformBuffer)
26+
/// * [`BufferVec`](crate::render_resource::BufferVec)
27+
/// * [`Texture`](crate::render_resource::Texture)
28+
#[derive(Resource)]
29+
pub enum GpuArrayBuffer<T: GpuArrayBufferable> {
30+
Uniform(BatchedUniformBuffer<T>),
31+
Storage((StorageBuffer<Vec<T>>, Vec<T>)),
32+
}
33+
34+
impl<T: GpuArrayBufferable> GpuArrayBuffer<T> {
35+
pub fn new(device: &RenderDevice) -> Self {
36+
let limits = device.limits();
37+
if limits.max_storage_buffers_per_shader_stage == 0 {
38+
GpuArrayBuffer::Uniform(BatchedUniformBuffer::new(&limits))
39+
} else {
40+
GpuArrayBuffer::Storage((StorageBuffer::default(), Vec::new()))
41+
}
42+
}
43+
44+
pub fn clear(&mut self) {
45+
match self {
46+
GpuArrayBuffer::Uniform(buffer) => buffer.clear(),
47+
GpuArrayBuffer::Storage((_, buffer)) => buffer.clear(),
48+
}
49+
}
50+
51+
pub fn push(&mut self, value: T) -> GpuArrayBufferIndex<T> {
52+
match self {
53+
GpuArrayBuffer::Uniform(buffer) => buffer.push(value),
54+
GpuArrayBuffer::Storage((_, buffer)) => {
55+
let index = buffer.len() as u32;
56+
buffer.push(value);
57+
GpuArrayBufferIndex {
58+
index,
59+
dynamic_offset: None,
60+
element_type: PhantomData,
61+
}
62+
}
63+
}
64+
}
65+
66+
pub fn write_buffer(&mut self, device: &RenderDevice, queue: &RenderQueue) {
67+
match self {
68+
GpuArrayBuffer::Uniform(buffer) => buffer.write_buffer(device, queue),
69+
GpuArrayBuffer::Storage((buffer, vec)) => {
70+
buffer.set(mem::take(vec));
71+
buffer.write_buffer(device, queue);
72+
}
73+
}
74+
}
75+
76+
pub fn binding_layout(
77+
binding: u32,
78+
visibility: ShaderStages,
79+
device: &RenderDevice,
80+
) -> BindGroupLayoutEntry {
81+
BindGroupLayoutEntry {
82+
binding,
83+
visibility,
84+
ty: if device.limits().max_storage_buffers_per_shader_stage == 0 {
85+
BindingType::Buffer {
86+
ty: BufferBindingType::Uniform,
87+
has_dynamic_offset: true,
88+
// BatchedUniformBuffer uses a MaxCapacityArray that is runtime-sized, so we use
89+
// None here and let wgpu figure out the size.
90+
min_binding_size: None,
91+
}
92+
} else {
93+
BindingType::Buffer {
94+
ty: BufferBindingType::Storage { read_only: true },
95+
has_dynamic_offset: false,
96+
min_binding_size: Some(T::min_size()),
97+
}
98+
},
99+
count: None,
100+
}
101+
}
102+
103+
pub fn binding(&self) -> Option<BindingResource> {
104+
match self {
105+
GpuArrayBuffer::Uniform(buffer) => buffer.binding(),
106+
GpuArrayBuffer::Storage((buffer, _)) => buffer.binding(),
107+
}
108+
}
109+
110+
pub fn batch_size(device: &RenderDevice) -> Option<u32> {
111+
let limits = device.limits();
112+
if limits.max_storage_buffers_per_shader_stage == 0 {
113+
Some(BatchedUniformBuffer::<T>::batch_size(&limits) as u32)
114+
} else {
115+
None
116+
}
117+
}
118+
}
119+
120+
/// An index into a [`GpuArrayBuffer`] for a given element.
121+
#[derive(Component)]
122+
pub struct GpuArrayBufferIndex<T: GpuArrayBufferable> {
123+
/// The index to use in a shader into the array.
124+
pub index: u32,
125+
/// The dynamic offset to use when setting the bind group in a pass.
126+
/// Only used on platforms that don't support storage buffers.
127+
pub dynamic_offset: Option<u32>,
128+
pub element_type: PhantomData<T>,
129+
}

crates/bevy_render/src/render_resource/mod.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1+
mod batched_uniform_buffer;
12
mod bind_group;
23
mod bind_group_layout;
34
mod buffer;
45
mod buffer_vec;
6+
mod gpu_array_buffer;
57
mod pipeline;
68
mod pipeline_cache;
79
mod pipeline_specializer;
@@ -15,6 +17,7 @@ pub use bind_group::*;
1517
pub use bind_group_layout::*;
1618
pub use buffer::*;
1719
pub use buffer_vec::*;
20+
pub use gpu_array_buffer::*;
1821
pub use pipeline::*;
1922
pub use pipeline_cache::*;
2023
pub use pipeline_specializer::*;

crates/bevy_render/src/render_resource/storage_buffer.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ use wgpu::{util::BufferInitDescriptor, BindingResource, BufferBinding, BufferUsa
2525
/// * [`DynamicStorageBuffer`](crate::render_resource::DynamicStorageBuffer)
2626
/// * [`UniformBuffer`](crate::render_resource::UniformBuffer)
2727
/// * [`DynamicUniformBuffer`](crate::render_resource::DynamicUniformBuffer)
28+
/// * [`GpuArrayBuffer`](crate::render_resource::GpuArrayBuffer)
2829
/// * [`BufferVec`](crate::render_resource::BufferVec)
2930
/// * [`Texture`](crate::render_resource::Texture)
3031
///
@@ -154,6 +155,7 @@ impl<T: ShaderType + WriteInto> StorageBuffer<T> {
154155
/// * [`StorageBuffer`](crate::render_resource::StorageBuffer)
155156
/// * [`UniformBuffer`](crate::render_resource::UniformBuffer)
156157
/// * [`DynamicUniformBuffer`](crate::render_resource::DynamicUniformBuffer)
158+
/// * [`GpuArrayBuffer`](crate::render_resource::GpuArrayBuffer)
157159
/// * [`BufferVec`](crate::render_resource::BufferVec)
158160
/// * [`Texture`](crate::render_resource::Texture)
159161
///

0 commit comments

Comments
 (0)