From 15685b550a3c97d63b4c071bae3cde37037b7719 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Wed, 23 Mar 2022 18:43:44 +0100 Subject: [PATCH 01/12] bevy_pbr: Fix and simplify scheduling of prepare_clusters system --- crates/bevy_pbr/src/lib.rs | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/crates/bevy_pbr/src/lib.rs b/crates/bevy_pbr/src/lib.rs index 20d0aa2f5a979..3659a88c088ac 100644 --- a/crates/bevy_pbr/src/lib.rs +++ b/crates/bevy_pbr/src/lib.rs @@ -150,12 +150,10 @@ impl Plugin for PbrPlugin { ) .add_system_to_stage( RenderStage::Prepare, - // this is added as an exclusive system because it contributes new views. it must run (and have Commands applied) - // _before_ the `prepare_views()` system is run. ideally this becomes a normal system when "stageless" features come out - render::prepare_clusters - .exclusive_system() - .label(RenderLightSystems::PrepareClusters) - .after(RenderLightSystems::PrepareLights), + // NOTE: This needs to run after prepare_lights. As prepare_lights is an exclusive system, + // just adding it to the non-exclusive systems in the Prepare stage means it runs after + // prepare_lights. + render::prepare_clusters.label(RenderLightSystems::PrepareClusters), ) .add_system_to_stage( RenderStage::Queue, From 58a6229de9717cf34c4e58e478111c46ee89a21e Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Wed, 23 Mar 2022 18:49:25 +0100 Subject: [PATCH 02/12] Add RenderDevice to specialization key for adapter feature/limit configuration --- crates/bevy_pbr/src/material.rs | 15 ++++++++++++--- crates/bevy_pbr/src/pbr_material.rs | 5 ++++- crates/bevy_sprite/src/mesh2d/material.rs | 15 ++++++++++++--- examples/shader/shader_material_glsl.rs | 2 +- 4 files changed, 29 insertions(+), 8 deletions(-) diff --git a/crates/bevy_pbr/src/material.rs b/crates/bevy_pbr/src/material.rs index 5e0648adcd43a..f82647832befa 100644 --- a/crates/bevy_pbr/src/material.rs +++ b/crates/bevy_pbr/src/material.rs @@ -89,7 +89,11 @@ impl SpecializedMaterial for M { type Key = (); #[inline] - fn key(_material: &::PreparedAsset) -> Self::Key {} + fn key( + _render_device: &RenderDevice, + _material: &::PreparedAsset, + ) -> Self::Key { + } #[inline] fn specialize( @@ -144,7 +148,10 @@ pub trait SpecializedMaterial: Asset + RenderAsset { /// Extract the [`SpecializedMaterial::Key`] for the "prepared" version of this material. This key will be /// passed in to the [`SpecializedMaterial::specialize`] function when compiling the [`RenderPipeline`](bevy_render::render_resource::RenderPipeline) /// for a given entity's material. - fn key(material: &::PreparedAsset) -> Self::Key; + fn key( + render_device: &RenderDevice, + material: &::PreparedAsset, + ) -> Self::Key; /// Specializes the given `descriptor` according to the given `key`. fn specialize( @@ -308,6 +315,7 @@ pub fn queue_material_meshes( material_pipeline: Res>, mut pipelines: ResMut>>, mut pipeline_cache: ResMut, + render_device: Res, msaa: Res, render_meshes: Res>, render_materials: Res>, @@ -320,6 +328,7 @@ pub fn queue_material_meshes( &mut RenderPhase, )>, ) { + let render_device = render_device.into_inner(); for (view, visible_entities, mut opaque_phase, mut alpha_mask_phase, mut transparent_phase) in views.iter_mut() { @@ -354,7 +363,7 @@ pub fn queue_material_meshes( mesh_key |= MeshPipelineKey::TRANSPARENT_MAIN_PASS; } - let material_key = M::key(material); + let material_key = M::key(render_device, material); let pipeline_id = pipelines.specialize( &mut pipeline_cache, diff --git a/crates/bevy_pbr/src/pbr_material.rs b/crates/bevy_pbr/src/pbr_material.rs index a9dccbdc3b960..6cf5d90b8400d 100644 --- a/crates/bevy_pbr/src/pbr_material.rs +++ b/crates/bevy_pbr/src/pbr_material.rs @@ -362,7 +362,10 @@ pub struct StandardMaterialKey { impl SpecializedMaterial for StandardMaterial { type Key = StandardMaterialKey; - fn key(render_asset: &::PreparedAsset) -> Self::Key { + fn key( + render_device: &RenderDevice, + render_asset: &::PreparedAsset, + ) -> Self::Key { StandardMaterialKey { normal_map: render_asset.has_normal_map, cull_mode: render_asset.cull_mode, diff --git a/crates/bevy_sprite/src/mesh2d/material.rs b/crates/bevy_sprite/src/mesh2d/material.rs index 6a900c4604d50..44fcbfbf558e2 100644 --- a/crates/bevy_sprite/src/mesh2d/material.rs +++ b/crates/bevy_sprite/src/mesh2d/material.rs @@ -86,7 +86,11 @@ impl SpecializedMaterial2d for M { type Key = (); #[inline] - fn key(_material: &::PreparedAsset) -> Self::Key {} + fn key( + _render_device: &RenderDevice, + _material: &::PreparedAsset, + ) -> Self::Key { + } #[inline] fn specialize( @@ -136,7 +140,10 @@ pub trait SpecializedMaterial2d: Asset + RenderAsset { /// Extract the [`SpecializedMaterial2d::Key`] for the "prepared" version of this material. This key will be /// passed in to the [`SpecializedMaterial2d::specialize`] function when compiling the [`RenderPipeline`](bevy_render::render_resource::RenderPipeline) /// for a given entity's material. - fn key(material: &::PreparedAsset) -> Self::Key; + fn key( + render_device: &RenderDevice, + material: &::PreparedAsset, + ) -> Self::Key; /// Specializes the given `descriptor` according to the given `key`. fn specialize( @@ -292,6 +299,7 @@ pub fn queue_material2d_meshes( material2d_pipeline: Res>, mut pipelines: ResMut>>, mut pipeline_cache: ResMut, + render_device: Res, msaa: Res, render_meshes: Res>, render_materials: Res>, @@ -301,6 +309,7 @@ pub fn queue_material2d_meshes( if material2d_meshes.is_empty() { return; } + let render_device = render_device.into_inner(); for (visible_entities, mut transparent_phase) in views.iter_mut() { let draw_transparent_pbr = transparent_draw_functions .read() @@ -318,7 +327,7 @@ pub fn queue_material2d_meshes( let mesh_key = msaa_key | Mesh2dPipelineKey::from_primitive_topology(mesh.primitive_topology); - let material_key = M::key(material2d); + let material_key = M::key(render_device, material2d); let pipeline_id = pipelines.specialize( &mut pipeline_cache, &material2d_pipeline, diff --git a/examples/shader/shader_material_glsl.rs b/examples/shader/shader_material_glsl.rs index 05e8a659fe4b3..454c9814e474a 100644 --- a/examples/shader/shader_material_glsl.rs +++ b/examples/shader/shader_material_glsl.rs @@ -94,7 +94,7 @@ impl RenderAsset for CustomMaterial { impl SpecializedMaterial for CustomMaterial { type Key = (); - fn key(_: &::PreparedAsset) -> Self::Key {} + fn key(_: &RenderDevice, _: &::PreparedAsset) -> Self::Key {} fn specialize( descriptor: &mut RenderPipelineDescriptor, From a053119d1e313354eba5ec6ff82c851604a55741 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Wed, 23 Mar 2022 18:51:24 +0100 Subject: [PATCH 03/12] bevy_pbr: Use storage buffers for point lights if supported --- crates/bevy_pbr/src/lib.rs | 12 +- crates/bevy_pbr/src/light.rs | 7 +- crates/bevy_pbr/src/pbr_material.rs | 12 + crates/bevy_pbr/src/render/light.rs | 349 ++++++++++++++---- crates/bevy_pbr/src/render/mesh.rs | 45 +-- .../src/render/mesh_view_bind_group.wgsl | 24 +- crates/bevy_pbr/src/render/pbr.wgsl | 29 +- .../src/render_resource/storage_buffer.rs | 10 + 8 files changed, 382 insertions(+), 106 deletions(-) diff --git a/crates/bevy_pbr/src/lib.rs b/crates/bevy_pbr/src/lib.rs index 3659a88c088ac..2aa3889a4bee5 100644 --- a/crates/bevy_pbr/src/lib.rs +++ b/crates/bevy_pbr/src/lib.rs @@ -41,6 +41,7 @@ use bevy_render::{ render_graph::RenderGraph, render_phase::{sort_phase_system, AddRenderCommand, DrawFunctions}, render_resource::{Shader, SpecializedMeshPipelines}, + renderer::RenderDevice, view::VisibilitySystems, RenderApp, RenderStage, }; @@ -126,6 +127,15 @@ impl Plugin for PbrPlugin { }, ); + // NOTE: 3 storage buffer bindings are needed for clustered-forward rendering so check + // that at least that many are supported + let use_storage_buffers = app + .world + .resource::() + .limits() + .max_storage_buffers_per_shader_stage + >= 3; + let render_app = match app.get_sub_app_mut(RenderApp) { Ok(render_app) => render_app, Err(_) => return, @@ -164,7 +174,7 @@ impl Plugin for PbrPlugin { .init_resource::() .init_resource::>() .init_resource::() - .init_resource::() + .insert_resource(GlobalLightMeta::new(use_storage_buffers)) .init_resource::>(); let shadow_pass_node = ShadowPassNode::new(&mut render_app.world); diff --git a/crates/bevy_pbr/src/light.rs b/crates/bevy_pbr/src/light.rs index 1258c944de24a..e2268d3ca2816 100644 --- a/crates/bevy_pbr/src/light.rs +++ b/crates/bevy_pbr/src/light.rs @@ -9,6 +9,7 @@ use bevy_render::{ color::Color, prelude::Image, primitives::{Aabb, CubemapFrusta, Frustum, Sphere}, + renderer::RenderDevice, view::{ComputedVisibility, RenderLayers, Visibility, VisibleEntities}, }; use bevy_transform::components::GlobalTransform; @@ -709,6 +710,7 @@ pub(crate) fn assign_lights_to_clusters( lights_query: Query<(Entity, &GlobalTransform, &PointLight, &Visibility)>, mut lights: Local>, mut max_point_lights_warning_emitted: Local, + render_device: Res, ) { global_lights.entities.clear(); lights.clear(); @@ -727,7 +729,10 @@ pub(crate) fn assign_lights_to_clusters( ), ); - if lights.len() > MAX_POINT_LIGHTS { + // NOTE: Clustered-forward rendering requires 3 buffer bindings so only use storage buffers + // if at least 3 are supported + let use_storage_buffers = render_device.limits().max_storage_buffers_per_shader_stage >= 3; + if !use_storage_buffers && lights.len() > MAX_POINT_LIGHTS { lights.sort_by(|light_1, light_2| { point_light_order( (&light_1.entity, &light_1.shadows_enabled), diff --git a/crates/bevy_pbr/src/pbr_material.rs b/crates/bevy_pbr/src/pbr_material.rs index 6cf5d90b8400d..413b89486f862 100644 --- a/crates/bevy_pbr/src/pbr_material.rs +++ b/crates/bevy_pbr/src/pbr_material.rs @@ -357,6 +357,7 @@ impl RenderAsset for StandardMaterial { pub struct StandardMaterialKey { normal_map: bool, cull_mode: Option, + use_storage_buffers: bool, } impl SpecializedMaterial for StandardMaterial { @@ -369,6 +370,9 @@ impl SpecializedMaterial for StandardMaterial { StandardMaterialKey { normal_map: render_asset.has_normal_map, cull_mode: render_asset.cull_mode, + // NOTE: Clustered-forward rendering requires 3 storage buffer bindings so check that + // at least that many are supported. + use_storage_buffers: render_device.limits().max_storage_buffers_per_shader_stage >= 3, } } @@ -386,6 +390,14 @@ impl SpecializedMaterial for StandardMaterial { .push(String::from("STANDARDMATERIAL_NORMAL_MAP")); } descriptor.primitive.cull_mode = key.cull_mode; + if !key.use_storage_buffers { + descriptor + .fragment + .as_mut() + .unwrap() + .shader_defs + .push(String::from("NO_STORAGE_BUFFERS_SUPPORT")); + } if let Some(label) = &mut descriptor.label { *label = format!("pbr_{}", *label).into(); } diff --git a/crates/bevy_pbr/src/render/light.rs b/crates/bevy_pbr/src/render/light.rs index fbffe9cd021cd..a64d8cfb04c35 100644 --- a/crates/bevy_pbr/src/render/light.rs +++ b/crates/bevy_pbr/src/render/light.rs @@ -10,7 +10,7 @@ use bevy_ecs::{ prelude::*, system::{lifetimeless::*, SystemParamItem}, }; -use bevy_math::{const_vec3, Mat4, UVec3, UVec4, Vec2, Vec3, Vec4, Vec4Swizzles}; +use bevy_math::{const_vec3, Mat4, UVec2, UVec3, UVec4, Vec2, Vec3, Vec4, Vec4Swizzles}; use bevy_render::{ camera::{Camera, CameraProjection}, color::Color, @@ -22,7 +22,7 @@ use bevy_render::{ EntityRenderCommand, PhaseItem, RenderCommandResult, RenderPhase, SetItemPipeline, TrackedRenderPass, }, - render_resource::{std140::AsStd140, *}, + render_resource::{std140::AsStd140, std430::AsStd430, *}, renderer::{RenderContext, RenderDevice, RenderQueue}, texture::*, view::{ @@ -81,7 +81,7 @@ pub struct ExtractedDirectionalLight { pub type ExtractedDirectionalLightShadowMap = DirectionalLightShadowMap; #[repr(C)] -#[derive(Copy, Clone, AsStd140, Default, Debug)] +#[derive(Copy, Clone, AsStd140, AsStd430, Default, Debug)] pub struct GpuPointLight { // The lower-right 2x2 values of the projection matrix 22 23 32 33 projection_lr: Vec4, @@ -92,9 +92,87 @@ pub struct GpuPointLight { shadow_normal_bias: f32, } -#[derive(AsStd140)] -pub struct GpuPointLights { - data: [GpuPointLight; MAX_POINT_LIGHTS], +pub enum GpuPointLights { + Uniform { + buffer: UniformVec<[GpuPointLight; MAX_POINT_LIGHTS]>, + }, + Storage { + buffer: StorageBuffer, + }, +} + +impl GpuPointLights { + fn new(use_storage_buffers: bool) -> Self { + if use_storage_buffers { + Self::storage() + } else { + Self::uniform() + } + } + + fn uniform() -> Self { + Self::Uniform { + buffer: UniformVec::default(), + } + } + + fn storage() -> Self { + Self::Storage { + buffer: StorageBuffer::default(), + } + } + + fn clear(&mut self) { + match self { + GpuPointLights::Uniform { buffer } => buffer.clear(), + GpuPointLights::Storage { buffer } => buffer.clear(), + } + } + + fn push(&mut self, mut lights: Vec) { + match self { + GpuPointLights::Uniform { buffer } => { + // NOTE: This iterator construction allows moving and padding with default + // values and is like this to avoid unnecessary cloning. + let gpu_point_lights = lights + .drain(..) + .chain(std::iter::repeat_with(GpuPointLight::default)) + .take(MAX_POINT_LIGHTS) + .collect::>(); + buffer.push(gpu_point_lights.try_into().unwrap()); + } + GpuPointLights::Storage { buffer } => { + for light in lights.drain(..) { + buffer.push(light); + } + } + } + } + + fn write_buffer(&mut self, render_device: &RenderDevice, render_queue: &RenderQueue) { + match self { + GpuPointLights::Uniform { buffer } => buffer.write_buffer(render_device, render_queue), + GpuPointLights::Storage { buffer } => buffer.write_buffer(render_device, render_queue), + } + } + + pub fn binding(&self) -> Option { + match self { + GpuPointLights::Uniform { buffer } => buffer.binding(), + GpuPointLights::Storage { buffer } => buffer.binding(), + } + } + + pub fn len(&self) -> usize { + match self { + GpuPointLights::Uniform { buffer } => buffer.len(), + GpuPointLights::Storage { buffer } => buffer.values().len(), + } + } + + pub fn is_empty(&self) -> bool { + self.len() == 0 + } } // NOTE: These must match the bit flags in bevy_pbr2/src/render/pbr.frag! @@ -352,7 +430,6 @@ pub fn extract_lights( point_light_shadow_map: Res, directional_light_shadow_map: Res, global_point_lights: Res, - // visible_point_lights: Query<&VisiblePointLights>, mut point_lights: Query<(&PointLight, &mut CubemapVisibleEntities, &GlobalTransform)>, mut directional_lights: Query<( Entity, @@ -361,6 +438,7 @@ pub fn extract_lights( &GlobalTransform, &Visibility, )>, + mut previous_point_lights_len: Local, ) { commands.insert_resource(ExtractedAmbientLight { color: ambient_light.color, @@ -379,32 +457,38 @@ pub fn extract_lights( // https://catlikecoding.com/unity/tutorials/custom-srp/point-and-spot-shadows/ let point_light_texel_size = 2.0 / point_light_shadow_map.size as f32; + let mut point_lights_values = Vec::with_capacity(*previous_point_lights_len); for entity in global_point_lights.iter().copied() { if let Ok((point_light, cubemap_visible_entities, transform)) = point_lights.get_mut(entity) { let render_cubemap_visible_entities = std::mem::take(cubemap_visible_entities.into_inner()); - commands.get_or_spawn(entity).insert_bundle(( - ExtractedPointLight { - color: point_light.color, - // NOTE: Map from luminous power in lumens to luminous intensity in lumens per steradian - // for a point light. See https://google.github.io/filament/Filament.html#mjx-eqn-pointLightLuminousPower - // for details. - intensity: point_light.intensity / (4.0 * std::f32::consts::PI), - range: point_light.range, - radius: point_light.radius, - transform: *transform, - shadows_enabled: point_light.shadows_enabled, - shadow_depth_bias: point_light.shadow_depth_bias, - // The factor of SQRT_2 is for the worst-case diagonal offset - shadow_normal_bias: point_light.shadow_normal_bias - * point_light_texel_size - * std::f32::consts::SQRT_2, - }, - render_cubemap_visible_entities, + point_lights_values.push(( + entity, + ( + ExtractedPointLight { + color: point_light.color, + // NOTE: Map from luminous power in lumens to luminous intensity in lumens per steradian + // for a point light. See https://google.github.io/filament/Filament.html#mjx-eqn-pointLightLuminousPower + // for details. + intensity: point_light.intensity / (4.0 * std::f32::consts::PI), + range: point_light.range, + radius: point_light.radius, + transform: *transform, + shadows_enabled: point_light.shadows_enabled, + shadow_depth_bias: point_light.shadow_depth_bias, + // The factor of SQRT_2 is for the worst-case diagonal offset + shadow_normal_bias: point_light.shadow_normal_bias + * point_light_texel_size + * std::f32::consts::SQRT_2, + }, + render_cubemap_visible_entities, + ), )); } } + *previous_point_lights_len = point_lights_values.len(); + commands.insert_or_spawn_batch(point_lights_values); for (entity, directional_light, visible_entities, transform, visibility) in directional_lights.iter_mut() @@ -528,12 +612,20 @@ pub struct ViewLightsUniformOffset { pub offset: u32, } -#[derive(Default)] pub struct GlobalLightMeta { - pub gpu_point_lights: UniformVec, + pub gpu_point_lights: GpuPointLights, pub entity_to_index: HashMap, } +impl GlobalLightMeta { + pub fn new(use_storage_buffers: bool) -> Self { + Self { + gpu_point_lights: GpuPointLights::new(use_storage_buffers), + entity_to_index: HashMap::default(), + } + } +} + #[derive(Default)] pub struct LightMeta { pub view_gpu_lights: DynamicUniformVec, @@ -615,14 +707,14 @@ pub fn prepare_lights( .reserve(point_lights.len()); } - let mut gpu_point_lights = [GpuPointLight::default(); MAX_POINT_LIGHTS]; + let mut gpu_point_lights = Vec::new(); for (index, &(entity, light)) in point_lights.iter().enumerate() { let mut flags = PointLightFlags::NONE; // Lights are sorted, shadow enabled lights are first if light.shadows_enabled && index < MAX_POINT_LIGHT_SHADOW_MAPS { flags |= PointLightFlags::SHADOWS_ENABLED; } - gpu_point_lights[index] = GpuPointLight { + gpu_point_lights.push(GpuPointLight { projection_lr: Vec4::new( cube_face_projection.z_axis.z, cube_face_projection.z_axis.w, @@ -639,12 +731,10 @@ pub fn prepare_lights( flags: flags.bits, shadow_depth_bias: light.shadow_depth_bias, shadow_normal_bias: light.shadow_normal_bias, - }; + }); global_light_meta.entity_to_index.insert(entity, index); } - global_light_meta.gpu_point_lights.push(GpuPointLights { - data: gpu_point_lights, - }); + global_light_meta.gpu_point_lights.push(gpu_point_lights); global_light_meta .gpu_point_lights .write_buffer(&render_device, &render_queue); @@ -931,14 +1021,48 @@ fn pack_offset_and_count(offset: usize, count: usize) -> u32 { | (count as u32 & CLUSTER_COUNT_MASK) } -#[derive(Component, Default)] +enum ViewClusterBuffers { + Uniform { + // NOTE: UVec4 is because all arrays in Std140 layout have 16-byte alignment + cluster_light_index_lists: UniformVec<[UVec4; ViewClusterBindings::MAX_UNIFORM_ITEMS]>, + // NOTE: UVec4 is because all arrays in Std140 layout have 16-byte alignment + cluster_offsets_and_counts: UniformVec<[UVec4; ViewClusterBindings::MAX_UNIFORM_ITEMS]>, + }, + Storage { + cluster_light_index_lists: StorageBuffer, + cluster_offsets_and_counts: StorageBuffer, + }, +} + +impl ViewClusterBuffers { + fn new(use_storage_buffers: bool) -> Self { + if use_storage_buffers { + Self::storage() + } else { + Self::uniform() + } + } + + fn uniform() -> Self { + ViewClusterBuffers::Uniform { + cluster_light_index_lists: UniformVec::default(), + cluster_offsets_and_counts: UniformVec::default(), + } + } + + fn storage() -> Self { + ViewClusterBuffers::Storage { + cluster_light_index_lists: StorageBuffer::default(), + cluster_offsets_and_counts: StorageBuffer::default(), + } + } +} + +#[derive(Component)] pub struct ViewClusterBindings { n_indices: usize, - // NOTE: UVec4 is because all arrays in Std140 layout have 16-byte alignment - pub cluster_light_index_lists: UniformVec<[UVec4; Self::MAX_UNIFORM_ITEMS]>, n_offsets: usize, - // NOTE: UVec4 is because all arrays in Std140 layout have 16-byte alignment - pub cluster_offsets_and_counts: UniformVec<[UVec4; Self::MAX_UNIFORM_ITEMS]>, + buffers: ViewClusterBuffers, } impl ViewClusterBindings { @@ -946,25 +1070,59 @@ impl ViewClusterBindings { const MAX_UNIFORM_ITEMS: usize = Self::MAX_OFFSETS / 4; pub const MAX_INDICES: usize = 16384; + pub fn new(use_storage_buffers: bool) -> Self { + Self { + n_indices: 0, + n_offsets: 0, + buffers: ViewClusterBuffers::new(use_storage_buffers), + } + } + pub fn reserve_and_clear(&mut self) { - self.cluster_light_index_lists.clear(); - self.cluster_light_index_lists - .push([UVec4::ZERO; Self::MAX_UNIFORM_ITEMS]); - self.cluster_offsets_and_counts.clear(); - self.cluster_offsets_and_counts - .push([UVec4::ZERO; Self::MAX_UNIFORM_ITEMS]); + match &mut self.buffers { + ViewClusterBuffers::Uniform { + cluster_light_index_lists, + cluster_offsets_and_counts, + } => { + cluster_light_index_lists.clear(); + cluster_light_index_lists.push([UVec4::ZERO; Self::MAX_UNIFORM_ITEMS]); + cluster_offsets_and_counts.clear(); + cluster_offsets_and_counts.push([UVec4::ZERO; Self::MAX_UNIFORM_ITEMS]); + } + ViewClusterBuffers::Storage { + cluster_light_index_lists, + cluster_offsets_and_counts, + .. + } => { + cluster_light_index_lists.clear(); + cluster_offsets_and_counts.clear(); + } + } } pub fn push_offset_and_count(&mut self, offset: usize, count: usize) { - let array_index = self.n_offsets >> 2; // >> 2 is equivalent to / 4 - if array_index >= Self::MAX_UNIFORM_ITEMS { - warn!("cluster offset and count out of bounds!"); - return; - } - let component = self.n_offsets & ((1 << 2) - 1); - let packed = pack_offset_and_count(offset, count); + match &mut self.buffers { + ViewClusterBuffers::Uniform { + cluster_offsets_and_counts, + .. + } => { + let array_index = self.n_offsets >> 2; // >> 2 is equivalent to / 4 + if array_index >= Self::MAX_UNIFORM_ITEMS { + warn!("cluster offset and count out of bounds!"); + return; + } + let component = self.n_offsets & ((1 << 2) - 1); + let packed = pack_offset_and_count(offset, count); - self.cluster_offsets_and_counts.get_mut(0)[array_index][component] = packed; + cluster_offsets_and_counts.get_mut(0)[array_index][component] = packed; + } + ViewClusterBuffers::Storage { + cluster_offsets_and_counts, + .. + } => { + cluster_offsets_and_counts.push(UVec2::new(offset as u32, count as u32)); + } + } self.n_offsets += 1; } @@ -974,16 +1132,74 @@ impl ViewClusterBindings { } pub fn push_index(&mut self, index: usize) { - let array_index = self.n_indices >> 4; // >> 4 is equivalent to / 16 - let component = (self.n_indices >> 2) & ((1 << 2) - 1); - let sub_index = self.n_indices & ((1 << 2) - 1); - let index = index as u32 & POINT_LIGHT_INDEX_MASK; - - self.cluster_light_index_lists.get_mut(0)[array_index][component] |= - index << (8 * sub_index); + match &mut self.buffers { + ViewClusterBuffers::Uniform { + cluster_light_index_lists, + .. + } => { + let array_index = self.n_indices >> 4; // >> 4 is equivalent to / 16 + let component = (self.n_indices >> 2) & ((1 << 2) - 1); + let sub_index = self.n_indices & ((1 << 2) - 1); + let index = index as u32 & POINT_LIGHT_INDEX_MASK; + + cluster_light_index_lists.get_mut(0)[array_index][component] |= + index << (8 * sub_index); + } + ViewClusterBuffers::Storage { + cluster_light_index_lists, + .. + } => { + cluster_light_index_lists.push(index as u32); + } + } self.n_indices += 1; } + + pub fn write_buffers(&mut self, render_device: &RenderDevice, render_queue: &RenderQueue) { + match &mut self.buffers { + ViewClusterBuffers::Uniform { + cluster_light_index_lists, + cluster_offsets_and_counts, + } => { + cluster_light_index_lists.write_buffer(render_device, render_queue); + cluster_offsets_and_counts.write_buffer(render_device, render_queue); + } + ViewClusterBuffers::Storage { + cluster_light_index_lists, + cluster_offsets_and_counts, + } => { + cluster_light_index_lists.write_buffer(render_device, render_queue); + cluster_offsets_and_counts.write_buffer(render_device, render_queue); + } + } + } + + pub fn light_index_lists_binding(&self) -> Option { + match &self.buffers { + ViewClusterBuffers::Uniform { + cluster_light_index_lists, + .. + } => cluster_light_index_lists.binding(), + ViewClusterBuffers::Storage { + cluster_light_index_lists, + .. + } => cluster_light_index_lists.binding(), + } + } + + pub fn offsets_and_counts_binding(&self) -> Option { + match &self.buffers { + ViewClusterBuffers::Uniform { + cluster_offsets_and_counts, + .. + } => cluster_offsets_and_counts.binding(), + ViewClusterBuffers::Storage { + cluster_offsets_and_counts, + .. + } => cluster_offsets_and_counts.binding(), + } + } } pub fn prepare_clusters( @@ -1000,8 +1216,9 @@ pub fn prepare_clusters( With>, >, ) { + let use_storage_buffers = render_device.limits().max_storage_buffers_per_shader_stage >= 3; for (entity, cluster_config, extracted_clusters) in views.iter() { - let mut view_clusters_bindings = ViewClusterBindings::default(); + let mut view_clusters_bindings = ViewClusterBindings::new(use_storage_buffers); view_clusters_bindings.reserve_and_clear(); let mut indices_full = false; @@ -1019,8 +1236,9 @@ pub fn prepare_clusters( for entity in cluster_lights.iter() { if let Some(light_index) = global_light_meta.entity_to_index.get(entity) { - if view_clusters_bindings.n_indices() - >= ViewClusterBindings::MAX_INDICES + if !use_storage_buffers + && view_clusters_bindings.n_indices() + >= ViewClusterBindings::MAX_INDICES { warn!("Cluster light index lists is full! The PointLights in the view are affecting too many clusters."); indices_full = true; @@ -1036,12 +1254,7 @@ pub fn prepare_clusters( } } - view_clusters_bindings - .cluster_light_index_lists - .write_buffer(&render_device, &render_queue); - view_clusters_bindings - .cluster_offsets_and_counts - .write_buffer(&render_device, &render_queue); + view_clusters_bindings.write_buffers(&render_device, &render_queue); commands.get_or_spawn(entity).insert(view_clusters_bindings); } diff --git a/crates/bevy_pbr/src/render/mesh.rs b/crates/bevy_pbr/src/render/mesh.rs index 08ac42b83865d..357e00e3bcb56 100644 --- a/crates/bevy_pbr/src/render/mesh.rs +++ b/crates/bevy_pbr/src/render/mesh.rs @@ -263,6 +263,12 @@ pub struct MeshPipeline { impl FromWorld for MeshPipeline { fn from_world(world: &mut World) -> Self { let render_device = world.resource::(); + let (cluster_buffer_binding_type, cluster_min_binding_size) = + if render_device.limits().max_storage_buffers_per_shader_stage >= 3 { + (BufferBindingType::Storage { read_only: true }, None) + } else { + (BufferBindingType::Uniform, BufferSize::new(16384)) + }; let view_layout = render_device.create_bind_group_layout(&BindGroupLayoutDescriptor { entries: &[ // View @@ -334,11 +340,12 @@ impl FromWorld for MeshPipeline { binding: 6, visibility: ShaderStages::FRAGMENT, ty: BindingType::Buffer { - ty: BufferBindingType::Uniform, + ty: cluster_buffer_binding_type, has_dynamic_offset: false, - // NOTE: Static size for uniform buffers. GpuPointLight has a padded - // size of 64 bytes, so 16384 / 64 = 256 point lights max - min_binding_size: BufferSize::new(16384), + // NOTE (when no storage buffers): Static size for uniform buffers. + // GpuPointLight has a padded size of 64 bytes, so 16384 / 64 = 256 + // point lights max + min_binding_size: cluster_min_binding_size, }, count: None, }, @@ -347,10 +354,11 @@ impl FromWorld for MeshPipeline { binding: 7, visibility: ShaderStages::FRAGMENT, ty: BindingType::Buffer { - ty: BufferBindingType::Uniform, + ty: cluster_buffer_binding_type, has_dynamic_offset: false, - // NOTE: With 256 point lights max, indices need 8 bits so use u8 - min_binding_size: BufferSize::new(16384), + // NOTE (when no storage buffers): With 256 point lights max, indices + // need 8 bits so use u8 + min_binding_size: cluster_min_binding_size, }, count: None, }, @@ -359,13 +367,14 @@ impl FromWorld for MeshPipeline { binding: 8, visibility: ShaderStages::FRAGMENT, ty: BindingType::Buffer { - ty: BufferBindingType::Uniform, + ty: cluster_buffer_binding_type, has_dynamic_offset: false, - // NOTE: The offset needs to address 16384 indices, which needs 14 bits. - // The count can be at most all 256 lights so 8 bits. - // Pack the offset into the upper 24 bits and the count into the - // lower 8 bits. - min_binding_size: BufferSize::new(16384), + // NOTE (when no storage buffers): The offset needs to address 16384 + // indices, which needs 14 bits. The count can be at most all 256 lights + // so 8 bits. + // NOTE: Pack the offset into the upper 19 bits and the count into the + // lower 13 bits. + min_binding_size: cluster_min_binding_size, }, count: None, }, @@ -770,17 +779,11 @@ pub fn queue_mesh_view_bind_groups( }, BindGroupEntry { binding: 7, - resource: view_cluster_bindings - .cluster_light_index_lists - .binding() - .unwrap(), + resource: view_cluster_bindings.light_index_lists_binding().unwrap(), }, BindGroupEntry { binding: 8, - resource: view_cluster_bindings - .cluster_offsets_and_counts - .binding() - .unwrap(), + resource: view_cluster_bindings.offsets_and_counts_binding().unwrap(), }, ], label: Some("mesh_view_bind_group"), diff --git a/crates/bevy_pbr/src/render/mesh_view_bind_group.wgsl b/crates/bevy_pbr/src/render/mesh_view_bind_group.wgsl index fd1d1dec4d79d..581334676f2cd 100644 --- a/crates/bevy_pbr/src/render/mesh_view_bind_group.wgsl +++ b/crates/bevy_pbr/src/render/mesh_view_bind_group.wgsl @@ -57,20 +57,30 @@ struct Lights { n_directional_lights: u32; }; +#ifdef NO_STORAGE_BUFFERS_SUPPORT struct PointLights { data: array; }; - struct ClusterLightIndexLists { // each u32 contains 4 u8 indices into the PointLights array data: array, 1024u>; }; - struct ClusterOffsetsAndCounts { // each u32 contains a 24-bit index into ClusterLightIndexLists in the high 24 bits // and an 8-bit count of the number of lights in the low 8 bits data: array, 1024u>; }; +#else +struct PointLights { + data: array; +}; +struct ClusterLightIndexLists { + data: array; +}; +struct ClusterOffsetsAndCounts { + data: array>; +}; +#endif [[group(0), binding(0)]] var view: View; @@ -94,9 +104,19 @@ var directional_shadow_textures: texture_depth_2d_array; #endif [[group(0), binding(5)]] var directional_shadow_textures_sampler: sampler_comparison; + +#ifdef NO_STORAGE_BUFFERS_SUPPORT [[group(0), binding(6)]] var point_lights: PointLights; [[group(0), binding(7)]] var cluster_light_index_lists: ClusterLightIndexLists; [[group(0), binding(8)]] var cluster_offsets_and_counts: ClusterOffsetsAndCounts; +#else +[[group(0), binding(6)]] +var point_lights: PointLights; +[[group(0), binding(7)]] +var cluster_light_index_lists: ClusterLightIndexLists; +[[group(0), binding(8)]] +var cluster_offsets_and_counts: ClusterOffsetsAndCounts; +#endif diff --git a/crates/bevy_pbr/src/render/pbr.wgsl b/crates/bevy_pbr/src/render/pbr.wgsl index 1e1e69f02a235..11be197584eca 100644 --- a/crates/bevy_pbr/src/render/pbr.wgsl +++ b/crates/bevy_pbr/src/render/pbr.wgsl @@ -263,29 +263,32 @@ fn fragment_cluster_index(frag_coord: vec2, view_z: f32, is_orthographic: b ); } -struct ClusterOffsetAndCount { - offset: u32; - count: u32; -}; - // this must match CLUSTER_COUNT_SIZE in light.rs let CLUSTER_COUNT_SIZE = 13u; -fn unpack_offset_and_count(cluster_index: u32) -> ClusterOffsetAndCount { +fn unpack_offset_and_count(cluster_index: u32) -> vec2 { +#ifdef NO_STORAGE_BUFFERS_SUPPORT let offset_and_count = cluster_offsets_and_counts.data[cluster_index >> 2u][cluster_index & ((1u << 2u) - 1u)]; - var output: ClusterOffsetAndCount; - // The offset is stored in the upper 24 bits - output.offset = (offset_and_count >> CLUSTER_COUNT_SIZE) & ((1u << 32u - CLUSTER_COUNT_SIZE) - 1u); - // The count is stored in the lower 8 bits - output.count = offset_and_count & ((1u << CLUSTER_COUNT_SIZE) - 1u); - return output; + return vec2( + // The offset is stored in the upper 32 - CLUSTER_COUNT_SIZE = 19 bits + (offset_and_count >> CLUSTER_COUNT_SIZE) & ((1u << 32u - CLUSTER_COUNT_SIZE) - 1u), + // The count is stored in the lower CLUSTER_COUNT_SIZE = 13 bits + offset_and_count & ((1u << CLUSTER_COUNT_SIZE) - 1u) + ); +#else + return cluster_offsets_and_counts.data[cluster_index]; +#endif } fn get_light_id(index: u32) -> u32 { +#ifdef NO_STORAGE_BUFFERS_SUPPORT // The index is correct but in cluster_light_index_lists we pack 4 u8s into a u32 // This means the index into cluster_light_index_lists is index / 4 let indices = cluster_light_index_lists.data[index >> 4u][(index >> 2u) & ((1u << 2u) - 1u)]; // And index % 4 gives the sub-index of the u8 within the u32 so we shift by 8 * sub-index return (indices >> (8u * (index & ((1u << 2u) - 1u)))) & ((1u << 8u) - 1u); +#else + return cluster_light_index_lists.data[index]; +#endif } fn point_light( @@ -578,7 +581,7 @@ fn fragment(in: FragmentInput) -> [[location(0)]] vec4 { ), in.world_position); let cluster_index = fragment_cluster_index(in.frag_coord.xy, view_z, is_orthographic); let offset_and_count = unpack_offset_and_count(cluster_index); - for (var i: u32 = offset_and_count.offset; i < offset_and_count.offset + offset_and_count.count; i = i + 1u) { + for (var i: u32 = offset_and_count[0]; i < offset_and_count[0] + offset_and_count[1]; i = i + 1u) { let light_id = get_light_id(i); let light = point_lights.data[light_id]; var shadow: f32 = 1.0; diff --git a/crates/bevy_render/src/render_resource/storage_buffer.rs b/crates/bevy_render/src/render_resource/storage_buffer.rs index 874f1527a1540..5921cff7893e1 100644 --- a/crates/bevy_render/src/render_resource/storage_buffer.rs +++ b/crates/bevy_render/src/render_resource/storage_buffer.rs @@ -126,4 +126,14 @@ impl StorageBuffer { pub fn values_mut(&mut self) -> &mut [T] { &mut self.values } + + #[inline] + pub fn clear(&mut self) { + self.values.clear(); + } + + #[inline] + pub fn push(&mut self, value: T) { + self.values.push(value); + } } From cb284ac88d5c7f34cc8dc4a4ba1ddaa9022a6e67 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Wed, 23 Mar 2022 18:52:59 +0100 Subject: [PATCH 04/12] examples: Add many_lights example for testing many point lights --- Cargo.toml | 4 + examples/3d/many_lights.rs | 166 +++++++++++++++++++++++++++++++++++++ examples/README.md | 1 + 3 files changed, 171 insertions(+) create mode 100644 examples/3d/many_lights.rs diff --git a/Cargo.toml b/Cargo.toml index dc3377ac8d155..abd8a3c0e17cc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -192,6 +192,10 @@ path = "examples/3d/load_gltf.rs" name = "many_cubes" path = "examples/3d/many_cubes.rs" +[[example]] +name = "many_lights" +path = "examples/3d/many_lights.rs" + [[example]] name = "msaa" path = "examples/3d/msaa.rs" diff --git a/examples/3d/many_lights.rs b/examples/3d/many_lights.rs new file mode 100644 index 0000000000000..6134b459de082 --- /dev/null +++ b/examples/3d/many_lights.rs @@ -0,0 +1,166 @@ +use bevy::{ + diagnostic::{FrameTimeDiagnosticsPlugin, LogDiagnosticsPlugin}, + math::{DVec2, DVec3}, + pbr::{ExtractedPointLight, GlobalLightMeta}, + prelude::*, + render::{RenderApp, RenderStage}, +}; + +fn main() { + App::new() + .insert_resource(WindowDescriptor { + width: 1024.0, + height: 768.0, + title: "many_lights".to_string(), + present_mode: bevy::window::PresentMode::Immediate, + ..default() + }) + .add_plugins(DefaultPlugins) + .add_plugin(FrameTimeDiagnosticsPlugin::default()) + .add_plugin(LogDiagnosticsPlugin::default()) + .add_startup_system(setup) + .add_system(move_camera) + .add_system(print_light_count) + .add_plugin(LogVisibleLights) + .run(); +} + +fn setup( + mut commands: Commands, + mut meshes: ResMut>, + mut materials: ResMut>, +) { + const LIGHT_RADIUS: f32 = 0.3; + const LIGHT_INTENSITY: f32 = 5.0; + const RADIUS: f32 = 50.0; + const N_LIGHTS: usize = 100_000; + + commands.spawn_bundle(PbrBundle { + mesh: meshes.add(Mesh::from(shape::Icosphere { + radius: RADIUS, + subdivisions: 9, + })), + material: materials.add(StandardMaterial::from(Color::WHITE)), + transform: Transform::from_scale(Vec3::splat(-1.0)), + ..default() + }); + + let mesh = meshes.add(Mesh::from(shape::Cube { size: 1.0 })); + let material = materials.add(StandardMaterial { + base_color: Color::PINK, + ..default() + }); + + // NOTE: This pattern is good for testing performance of culling as it provides roughly + // the same number of visible meshes regardless of the viewing angle. + // NOTE: f64 is used to avoid precision issues that produce visual artifacts in the distribution + let golden_ratio = 0.5f64 * (1.0f64 + 5.0f64.sqrt()); + for i in 0..N_LIGHTS { + let spherical_polar_theta_phi = fibonacci_spiral_on_sphere(golden_ratio, i, N_LIGHTS); + let unit_sphere_p = spherical_polar_to_cartesian(spherical_polar_theta_phi); + commands.spawn_bundle(PointLightBundle { + point_light: PointLight { + range: LIGHT_RADIUS, + intensity: LIGHT_INTENSITY, + ..default() + }, + transform: Transform::from_translation((RADIUS as f64 * unit_sphere_p).as_vec3()), + ..default() + }); + } + + // camera + commands.spawn_bundle(PerspectiveCameraBundle::default()); + + // add one cube, the only one with strong handles + // also serves as a reference point during rotation + commands.spawn_bundle(PbrBundle { + mesh, + material, + transform: Transform { + translation: Vec3::new(0.0, RADIUS as f32, 0.0), + scale: Vec3::splat(5.0), + ..default() + }, + ..default() + }); +} + +// NOTE: This epsilon value is apparently optimal for optimizing for the average +// nearest-neighbor distance. See: +// http://extremelearning.com.au/how-to-evenly-distribute-points-on-a-sphere-more-effectively-than-the-canonical-fibonacci-lattice/ +// for details. +const EPSILON: f64 = 0.36; +fn fibonacci_spiral_on_sphere(golden_ratio: f64, i: usize, n: usize) -> DVec2 { + DVec2::new( + 2.0 * std::f64::consts::PI * (i as f64 / golden_ratio), + (1.0 - 2.0 * (i as f64 + EPSILON) / (n as f64 - 1.0 + 2.0 * EPSILON)).acos(), + ) +} + +fn spherical_polar_to_cartesian(p: DVec2) -> DVec3 { + let (sin_theta, cos_theta) = p.x.sin_cos(); + let (sin_phi, cos_phi) = p.y.sin_cos(); + DVec3::new(cos_theta * sin_phi, sin_theta * sin_phi, cos_phi) +} + +// System for rotating the camera +fn move_camera(time: Res