diff --git a/Cargo.lock b/Cargo.lock index 4e2c5b82..c4b03805 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -657,7 +657,11 @@ version = "0.1.0" dependencies = [ "criterion", "itertools 0.14.0", + "pollster", "ranim", + "ranim-core", + "ranim-render", + "wgpu", ] [[package]] diff --git a/benches/Cargo.toml b/benches/Cargo.toml index f8e70169..ccc76b60 100644 --- a/benches/Cargo.toml +++ b/benches/Cargo.toml @@ -13,6 +13,10 @@ bench = false itertools.workspace = true criterion = { version = "0.8.1", features = ["html_reports"] } ranim = { path = "../", features = ["render"] } +ranim-core = { path = "../packages/ranim-core" } +ranim-render = { path = "../packages/ranim-render" } +wgpu = { workspace = true } +pollster = "0.4.0" [[bench]] name = "eval" @@ -25,3 +29,7 @@ harness = false [[bench]] name = "extract" harness = false + +[[bench]] +name = "gpu_render" +harness = false diff --git a/benches/benches/gpu_render.rs b/benches/benches/gpu_render.rs new file mode 100644 index 00000000..b50d87ef --- /dev/null +++ b/benches/benches/gpu_render.rs @@ -0,0 +1,243 @@ +//! GPU rendering benchmark — isolates the pure render_store_with_pool cost. +//! +//! Measures: +//! - CPU-side submission time (buffer upload + command encoding + queue submit) +//! - Scales with VItem count to identify bottleneck (draw calls vs SDF vs upload) + +use std::hint::black_box; + +use benches::test_scenes::static_squares; +use criterion::{BenchmarkId, Criterion, SamplingMode, criterion_group, criterion_main}; +use ranim::{SceneConstructor, prelude::*}; +use ranim_core::store::CoreItemStore; +use ranim_render::{Renderer, resource::RenderPool, utils::WgpuContext}; + +/// Pure GPU render benchmark: only measures render_store_with_pool + device.poll +fn gpu_render_benchmark(c: &mut Criterion) { + let ctx = pollster::block_on(WgpuContext::new()); + + let mut group = c.benchmark_group("gpu_render"); + group.sampling_mode(SamplingMode::Flat).sample_size(50); + + for n in [5, 10, 20, 40, 60].iter() { + let vitem_count = n * n; + + // Build the scene and eval to get a CoreItemStore + let scene = (|r: &mut RanimScene| static_squares(r, *n)).build_scene(); + let mut store = CoreItemStore::new(); + store.update(scene.eval_at_alpha(0.5)); + + let mut renderer = Renderer::new(&ctx, 1920, 1080, 8); + let mut render_textures = renderer.new_render_textures(&ctx); + let mut pool = RenderPool::new(); + let clear_color = wgpu::Color { + r: 0.2, + g: 0.2, + b: 0.2, + a: 1.0, + }; + + // Warm up: render once to initialize all GPU resources + renderer.render_store_with_pool(&ctx, &mut render_textures, clear_color, &store, &mut pool); + pool.clean(); + ctx.device + .poll(wgpu::PollType::wait_indefinitely()) + .unwrap(); + + group.bench_with_input( + BenchmarkId::new("submit", vitem_count), + &vitem_count, + |b, _| { + b.iter(|| { + renderer.render_store_with_pool( + &ctx, + &mut render_textures, + clear_color, + &store, + &mut pool, + ); + pool.clean(); + // Wait for GPU to finish so we measure actual GPU time too + ctx.device + .poll(wgpu::PollType::wait_indefinitely()) + .unwrap(); + black_box(()); + }); + }, + ); + } + + group.finish(); +} + +/// Measures just the CPU-side submission cost (no GPU wait) +fn cpu_submit_benchmark(c: &mut Criterion) { + let ctx = pollster::block_on(WgpuContext::new()); + + let mut group = c.benchmark_group("cpu_submit"); + group.sampling_mode(SamplingMode::Flat).sample_size(50); + + for n in [5, 10, 20, 40, 60].iter() { + let vitem_count = n * n; + + let scene = (|r: &mut RanimScene| static_squares(r, *n)).build_scene(); + let mut store = CoreItemStore::new(); + store.update(scene.eval_at_alpha(0.5)); + + let mut renderer = Renderer::new(&ctx, 1920, 1080, 8); + let mut render_textures = renderer.new_render_textures(&ctx); + let mut pool = RenderPool::new(); + let clear_color = wgpu::Color { + r: 0.2, + g: 0.2, + b: 0.2, + a: 1.0, + }; + + // Warm up + renderer.render_store_with_pool(&ctx, &mut render_textures, clear_color, &store, &mut pool); + pool.clean(); + ctx.device + .poll(wgpu::PollType::wait_indefinitely()) + .unwrap(); + + group.bench_with_input( + BenchmarkId::new("no_wait", vitem_count), + &vitem_count, + |b, _| { + b.iter(|| { + renderer.render_store_with_pool( + &ctx, + &mut render_textures, + clear_color, + &store, + &mut pool, + ); + pool.clean(); + // Don't wait — measures pure CPU submission overhead + black_box(()); + }); + }, + ); + } + + group.finish(); +} + +/// Merged buffer path: GPU render benchmark (with GPU wait) +fn merged_gpu_render_benchmark(c: &mut Criterion) { + let ctx = pollster::block_on(WgpuContext::new()); + + let mut group = c.benchmark_group("merged_gpu_render"); + group.sampling_mode(SamplingMode::Flat).sample_size(50); + + for n in [5, 10, 20, 40, 60].iter() { + let vitem_count = n * n; + + let scene = (|r: &mut RanimScene| static_squares(r, *n)).build_scene(); + let mut store = CoreItemStore::new(); + store.update(scene.eval_at_alpha(0.5)); + + let mut renderer = Renderer::new(&ctx, 1920, 1080, 8); + let mut render_textures = renderer.new_render_textures(&ctx); + let mut pool = RenderPool::new(); + let clear_color = wgpu::Color { + r: 0.2, + g: 0.2, + b: 0.2, + a: 1.0, + }; + + // Warm up + renderer.render_store_with_pool(&ctx, &mut render_textures, clear_color, &store, &mut pool); + pool.clean(); + ctx.device + .poll(wgpu::PollType::wait_indefinitely()) + .unwrap(); + + group.bench_with_input( + BenchmarkId::new("submit", vitem_count), + &vitem_count, + |b, _| { + b.iter(|| { + renderer.render_store_with_pool( + &ctx, + &mut render_textures, + clear_color, + &store, + &mut pool, + ); + pool.clean(); + ctx.device + .poll(wgpu::PollType::wait_indefinitely()) + .unwrap(); + black_box(()); + }); + }, + ); + } + + group.finish(); +} + +/// Merged buffer path: CPU-only submission benchmark (no GPU wait) +fn merged_cpu_submit_benchmark(c: &mut Criterion) { + let ctx = pollster::block_on(WgpuContext::new()); + + let mut group = c.benchmark_group("merged_cpu_submit"); + group.sampling_mode(SamplingMode::Flat).sample_size(50); + + for n in [5, 10, 20, 40, 60].iter() { + let vitem_count = n * n; + + let scene = (|r: &mut RanimScene| static_squares(r, *n)).build_scene(); + let mut store = CoreItemStore::new(); + store.update(scene.eval_at_alpha(0.5)); + + let mut renderer = Renderer::new(&ctx, 1920, 1080, 8); + let mut render_textures = renderer.new_render_textures(&ctx); + let mut pool = RenderPool::new(); + let clear_color = wgpu::Color { + r: 0.2, + g: 0.2, + b: 0.2, + a: 1.0, + }; + + // Warm up + renderer.render_store_with_pool(&ctx, &mut render_textures, clear_color, &store, &mut pool); + pool.clean(); + ctx.device + .poll(wgpu::PollType::wait_indefinitely()) + .unwrap(); + + group.bench_with_input( + BenchmarkId::new("no_wait", vitem_count), + &vitem_count, + |b, _| { + b.iter(|| { + renderer.render_store_with_pool( + &ctx, + &mut render_textures, + clear_color, + &store, + &mut pool, + ); + pool.clean(); + black_box(()); + }); + }, + ); + } + + group.finish(); +} + +criterion_group!( + benches, + gpu_render_benchmark, + cpu_submit_benchmark, + merged_gpu_render_benchmark, + merged_cpu_submit_benchmark +); +criterion_main!(benches); diff --git a/packages/ranim-render/src/graph/view.rs b/packages/ranim-render/src/graph/view.rs index e95b333a..c9aba857 100644 --- a/packages/ranim-render/src/graph/view.rs +++ b/packages/ranim-render/src/graph/view.rs @@ -9,6 +9,13 @@ pub use vitem_depth::*; pub mod oit_resolve; pub use oit_resolve::*; +pub mod merged_vitem_compute; +pub use merged_vitem_compute::*; +pub mod merged_vitem_depth; +pub use merged_vitem_depth::*; +pub mod merged_vitem_color; +pub use merged_vitem_color::*; + use crate::{ RenderContext, graph::{GlobalRenderNodeTrait, RenderPacketsQuery}, diff --git a/packages/ranim-render/src/graph/view/merged_vitem_color.rs b/packages/ranim-render/src/graph/view/merged_vitem_color.rs new file mode 100644 index 00000000..382afeb3 --- /dev/null +++ b/packages/ranim-render/src/graph/view/merged_vitem_color.rs @@ -0,0 +1,68 @@ +use crate::{ + RenderContext, RenderTextures, + graph::{RenderPacketsQuery, view::ViewRenderNodeTrait}, + pipelines::MergedVItemColorPipeline, + primitives::viewport::ViewportGpuPacket, +}; + +pub struct MergedVItemColorNode; + +impl ViewRenderNodeTrait for MergedVItemColorNode { + type Query = (); + + fn run( + &self, + #[cfg(not(feature = "profiling"))] encoder: &mut wgpu::CommandEncoder, + #[cfg(feature = "profiling")] encoder: &mut wgpu_profiler::Scope<'_, wgpu::CommandEncoder>, + _packets: ::Output<'_>, + ctx: RenderContext, + viewport: &ViewportGpuPacket, + ) { + let Some(merged) = ctx.merged_buffer else { + return; + }; + if merged.item_count() == 0 { + return; + } + + let RenderTextures { + render_view, + depth_stencil_view, + .. + } = ctx.render_textures; + let rpass_desc = wgpu::RenderPassDescriptor { + label: Some("Merged VItem Color Render Pass"), + color_attachments: &[Some(wgpu::RenderPassColorAttachment { + view: render_view, + resolve_target: None, + depth_slice: None, + ops: wgpu::Operations { + load: wgpu::LoadOp::Load, + store: wgpu::StoreOp::Store, + }, + })], + depth_stencil_attachment: Some(wgpu::RenderPassDepthStencilAttachment { + view: depth_stencil_view, + depth_ops: Some(wgpu::Operations { + load: wgpu::LoadOp::Load, + store: wgpu::StoreOp::Store, + }), + stencil_ops: None, + }), + timestamp_writes: None, + occlusion_query_set: None, + }; + #[cfg(feature = "profiling")] + let mut rpass = encoder.scoped_render_pass("Merged VItem Color Render Pass", rpass_desc); + #[cfg(not(feature = "profiling"))] + let mut rpass = encoder.begin_render_pass(&rpass_desc); + rpass.set_pipeline( + &ctx.pipelines + .get_or_init::(ctx.wgpu_ctx), + ); + rpass.set_bind_group(0, &ctx.resolution_info.bind_group, &[]); + rpass.set_bind_group(1, &viewport.uniforms_bind_group.bind_group, &[]); + rpass.set_bind_group(2, merged.render_bind_group.as_ref().unwrap(), &[]); + rpass.draw(0..4, 0..merged.item_count()); + } +} diff --git a/packages/ranim-render/src/graph/view/merged_vitem_compute.rs b/packages/ranim-render/src/graph/view/merged_vitem_compute.rs new file mode 100644 index 00000000..78de37bd --- /dev/null +++ b/packages/ranim-render/src/graph/view/merged_vitem_compute.rs @@ -0,0 +1,47 @@ +use crate::{ + RenderContext, + graph::{RenderPacketsQuery, view::ViewRenderNodeTrait}, + pipelines::MergedVItemComputePipeline, + primitives::viewport::ViewportGpuPacket, +}; + +pub struct MergedVItemComputeNode; + +impl ViewRenderNodeTrait for MergedVItemComputeNode { + type Query = (); + + fn run( + &self, + #[cfg(not(feature = "profiling"))] encoder: &mut wgpu::CommandEncoder, + #[cfg(feature = "profiling")] encoder: &mut wgpu_profiler::Scope<'_, wgpu::CommandEncoder>, + _packets: ::Output<'_>, + ctx: RenderContext, + _viewport: &ViewportGpuPacket, + ) { + let Some(merged) = ctx.merged_buffer else { + return; + }; + if merged.item_count() == 0 { + return; + } + + #[cfg(feature = "profiling")] + let mut encoder = encoder.scope("Merged Compute Pass"); + + { + #[cfg(feature = "profiling")] + let mut cpass = encoder.scoped_compute_pass("Merged VItem Map Points Compute Pass"); + #[cfg(not(feature = "profiling"))] + let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { + label: Some("Merged VItem Map Points Compute Pass"), + timestamp_writes: None, + }); + cpass.set_pipeline( + &ctx.pipelines + .get_or_init::(ctx.wgpu_ctx), + ); + cpass.set_bind_group(0, merged.compute_bind_group.as_ref().unwrap(), &[]); + cpass.dispatch_workgroups(merged.total_points().div_ceil(256), 1, 1); + } + } +} diff --git a/packages/ranim-render/src/graph/view/merged_vitem_depth.rs b/packages/ranim-render/src/graph/view/merged_vitem_depth.rs new file mode 100644 index 00000000..cb7ca637 --- /dev/null +++ b/packages/ranim-render/src/graph/view/merged_vitem_depth.rs @@ -0,0 +1,64 @@ +use crate::{ + RenderContext, RenderTextures, + graph::{RenderPacketsQuery, view::ViewRenderNodeTrait}, + pipelines::MergedVItemDepthPipeline, + primitives::viewport::ViewportGpuPacket, +}; + +pub struct MergedVItemDepthNode; + +impl ViewRenderNodeTrait for MergedVItemDepthNode { + type Query = (); + + fn run( + &self, + #[cfg(not(feature = "profiling"))] encoder: &mut wgpu::CommandEncoder, + #[cfg(feature = "profiling")] encoder: &mut wgpu_profiler::Scope<'_, wgpu::CommandEncoder>, + _packets: ::Output<'_>, + ctx: RenderContext, + viewport: &ViewportGpuPacket, + ) { + let Some(merged) = ctx.merged_buffer else { + return; + }; + if merged.item_count() == 0 { + return; + } + + #[cfg(feature = "profiling")] + let mut encoder = encoder.scope("Merged Depth Render Pass"); + + { + let RenderTextures { + depth_stencil_view, .. + } = ctx.render_textures; + let rpass_desc = wgpu::RenderPassDescriptor { + label: Some("Merged VItem Depth Render Pass"), + color_attachments: &[], + depth_stencil_attachment: Some(wgpu::RenderPassDepthStencilAttachment { + view: depth_stencil_view, + depth_ops: Some(wgpu::Operations { + load: wgpu::LoadOp::Load, + store: wgpu::StoreOp::Store, + }), + stencil_ops: None, + }), + timestamp_writes: None, + occlusion_query_set: None, + }; + #[cfg(feature = "profiling")] + let mut rpass = + encoder.scoped_render_pass("Merged VItem Depth Render Pass", rpass_desc); + #[cfg(not(feature = "profiling"))] + let mut rpass = encoder.begin_render_pass(&rpass_desc); + rpass.set_pipeline( + &ctx.pipelines + .get_or_init::(ctx.wgpu_ctx), + ); + rpass.set_bind_group(0, &ctx.resolution_info.bind_group, &[]); + rpass.set_bind_group(1, &viewport.uniforms_bind_group.bind_group, &[]); + rpass.set_bind_group(2, merged.render_bind_group.as_ref().unwrap(), &[]); + rpass.draw(0..4, 0..merged.item_count()); + } + } +} diff --git a/packages/ranim-render/src/lib.rs b/packages/ranim-render/src/lib.rs index 812b2500..f13595b8 100644 --- a/packages/ranim-render/src/lib.rs +++ b/packages/ranim-render/src/lib.rs @@ -20,7 +20,7 @@ use glam::{UVec3, uvec3}; use crate::{ graph::{AnyGlobalRenderNodeTrait, GlobalRenderGraph, RenderPackets}, - primitives::viewport::ViewportUniform, + primitives::{merged_vitem::MergedVItemBuffer, viewport::ViewportUniform}, resource::{PipelinesPool, RenderPool, RenderTextures}, utils::{WgpuBuffer, WgpuVecBuffer}, }; @@ -91,6 +91,8 @@ pub struct RenderContext<'a> { pub wgpu_ctx: &'a WgpuContext, pub resolution_info: &'a ResolutionInfo, pub clear_color: wgpu::Color, + /// Present when using the merged rendering path. + pub merged_buffer: Option<&'a MergedVItemBuffer>, } // MARK: Renderer @@ -102,6 +104,9 @@ pub struct Renderer { packets: RenderPackets, render_graph: GlobalRenderGraph, + /// Present when using the merged rendering path (lazily initialized on first use). + merged_buffer: Option, + #[cfg(feature = "profiling")] pub(crate) profiler: wgpu_profiler::GpuProfiler, } @@ -119,7 +124,65 @@ impl Renderer { self.width as f32 / self.height as f32 } + #[allow(unused)] + #[deprecated(note = "will be replaced by the GPU-driven one instead.")] + fn build_render_graph() -> GlobalRenderGraph { + use graph::*; + let mut render_graph = GlobalRenderGraph::new(); + let clear = render_graph.insert_node(ClearNode); + let view_render = render_graph.insert_node({ + use graph::view::*; + let mut render_graph = ViewRenderGraph::new(); + let vitem_compute = render_graph.insert_node(VItemComputeNode); + let vitem2d_depth = render_graph.insert_node(VItemDepthNode); + let vitem2d_render = render_graph.insert_node(VItemColorNode); + let oit_resolve = render_graph.insert_node(OITResolveNode); + render_graph.insert_edge(vitem_compute, vitem2d_depth); + render_graph.insert_edge(vitem2d_depth, vitem2d_render); + render_graph.insert_edge(vitem2d_render, oit_resolve); + render_graph + }); + render_graph.insert_edge(clear, view_render); + render_graph + } + + fn build_merged_render_graph() -> GlobalRenderGraph { + use graph::*; + let mut render_graph = GlobalRenderGraph::new(); + let clear = render_graph.insert_node(ClearNode); + let view_render = render_graph.insert_node({ + use graph::view::*; + let mut render_graph = ViewRenderGraph::new(); + let compute = render_graph.insert_node(MergedVItemComputeNode); + let depth = render_graph.insert_node(MergedVItemDepthNode); + let color = render_graph.insert_node(MergedVItemColorNode); + let oit_resolve = render_graph.insert_node(OITResolveNode); + render_graph.insert_edge(compute, depth); + render_graph.insert_edge(depth, color); + render_graph.insert_edge(color, oit_resolve); + render_graph + }); + render_graph.insert_edge(clear, view_render); + render_graph + } + pub fn new(ctx: &WgpuContext, width: u32, height: u32, oit_layers: usize) -> Self { + Self::new_with_graph( + ctx, + width, + height, + oit_layers, + Self::build_merged_render_graph(), + ) + } + + fn new_with_graph( + ctx: &WgpuContext, + width: u32, + height: u32, + oit_layers: usize, + render_graph: GlobalRenderGraph, + ) -> Self { let resolution_info = ResolutionInfo::new(ctx, width, height, oit_layers); #[cfg(feature = "profiling")] @@ -129,27 +192,6 @@ impl Renderer { ) .unwrap(); - let mut render_graph = GlobalRenderGraph::new(); - { - use graph::*; - // Global Render Nodes that executes per-frame - let clear = render_graph.insert_node(ClearNode); - let view_render = render_graph.insert_node({ - use graph::view::*; - // View Render Nodes that executes per-viewport in every frame - let mut render_graph = ViewRenderGraph::new(); - let vitem_compute = render_graph.insert_node(VItemComputeNode); - let vitem2d_depth = render_graph.insert_node(VItemDepthNode); - let vitem2d_render = render_graph.insert_node(VItemColorNode); - let oit_resolve = render_graph.insert_node(OITResolveNode); - render_graph.insert_edge(vitem_compute, vitem2d_depth); - render_graph.insert_edge(vitem2d_depth, vitem2d_render); - render_graph.insert_edge(vitem2d_render, oit_resolve); - render_graph - }); - render_graph.insert_edge(clear, view_render); - } - Self { width, height, @@ -157,7 +199,7 @@ impl Renderer { pipelines: PipelinesPool::default(), packets: RenderPackets::default(), render_graph, - // Profiler + merged_buffer: None, #[cfg(feature = "profiling")] profiler, } @@ -167,6 +209,7 @@ impl Renderer { RenderTextures::new(ctx, self.width, self.height) } + /// Render a frame. Pushes viewport + VItem packets via pool, then execs the render graph. pub fn render_store_with_pool( &mut self, ctx: &WgpuContext, @@ -175,10 +218,12 @@ impl Renderer { store: &CoreItemStore, pool: &mut RenderPool, ) { + // Viewport — always needed let (_id, camera_frame) = &store.camera_frames[0]; let viewport = ViewportUniform::from_camera_frame(camera_frame, self.width, self.height); - self.packets.push(pool.alloc_packet(ctx, &viewport)); + + // Per-VItem packets (old path nodes query these; merged nodes ignore them) self.packets.extend( store .vitems @@ -186,6 +231,13 @@ impl Renderer { .map(|(_id, data)| pool.alloc_packet(ctx, data)), ); + // Merged buffer (merged nodes read this; old nodes ignore it) + let merged = self + .merged_buffer + .get_or_insert_with(|| MergedVItemBuffer::new(ctx)); + merged.update(ctx, &store.vitems); + + // Encode & submit { #[cfg(feature = "profiling")] profiling::scope!("render"); @@ -198,7 +250,7 @@ impl Renderer { #[cfg(feature = "profiling")] let mut scope = self.profiler.scope("render", &mut encoder); - let ctx = RenderContext { + let render_ctx = RenderContext { pipelines: &self.pipelines, render_textures, render_packets: &self.packets, @@ -206,6 +258,7 @@ impl Renderer { wgpu_ctx: ctx, resolution_info: &self.resolution_info, clear_color, + merged_buffer: self.merged_buffer.as_ref(), }; self.render_graph.exec( @@ -213,7 +266,7 @@ impl Renderer { &mut encoder, #[cfg(feature = "profiling")] &mut scope, - ctx, + render_ctx, ); } @@ -228,19 +281,14 @@ impl Renderer { ctx.queue.submit(Some(encoder.finish())); } - // renderable.debug(ctx); - - // Signal to the profiler that the frame is finished. self.profiler.end_frame().unwrap(); - // Query for oldest finished frame (this is almost certainly not the one we just submitted!) and display results in the command line. ctx.device .poll(wgpu::PollType::wait_indefinitely()) .unwrap(); let latest_profiler_results = self .profiler .process_finished_frame(ctx.queue.get_timestamp_period()); - // profiling_utils::console_output(&latest_profiler_results, ctx.wgpu_ctx.device.features()); let mut gpu_profiler = PUFFIN_GPU_PROFILER.lock().unwrap(); wgpu_profiler::puffin::output_frame_to_puffin( &mut gpu_profiler, diff --git a/packages/ranim-render/src/pipelines/merged_vitem.rs b/packages/ranim-render/src/pipelines/merged_vitem.rs new file mode 100644 index 00000000..6d26503c --- /dev/null +++ b/packages/ranim-render/src/pipelines/merged_vitem.rs @@ -0,0 +1,188 @@ +use std::ops::Deref; + +use crate::{ + ResolutionInfo, WgpuContext, + primitives::{merged_vitem::MergedVItemBuffer, viewport::ViewportBindGroup}, + resource::{GpuResource, OUTPUT_TEXTURE_FORMAT}, +}; + +// MARK: Compute pipeline + +pub struct MergedVItemComputePipeline { + pipeline: wgpu::ComputePipeline, +} + +impl Deref for MergedVItemComputePipeline { + type Target = wgpu::ComputePipeline; + fn deref(&self) -> &Self::Target { + &self.pipeline + } +} + +impl GpuResource for MergedVItemComputePipeline { + fn new(ctx: &WgpuContext) -> Self { + let module = &ctx + .device + .create_shader_module(wgpu::include_wgsl!("./shaders/merged_vitem_compute.wgsl")); + let layout = ctx + .device + .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { + label: Some("Merged VItem Compute Pipeline Layout"), + bind_group_layouts: &[&MergedVItemBuffer::compute_bind_group_layout(ctx)], + push_constant_ranges: &[], + }); + let pipeline = ctx + .device + .create_compute_pipeline(&wgpu::ComputePipelineDescriptor { + label: Some("Merged VItem Compute Pipeline"), + layout: Some(&layout), + module, + entry_point: Some("cs_main"), + compilation_options: wgpu::PipelineCompilationOptions::default(), + cache: None, + }); + Self { pipeline } + } +} + +// MARK: Color pipeline + +pub struct MergedVItemColorPipeline { + pipeline: wgpu::RenderPipeline, +} + +impl Deref for MergedVItemColorPipeline { + type Target = wgpu::RenderPipeline; + fn deref(&self) -> &Self::Target { + &self.pipeline + } +} + +impl GpuResource for MergedVItemColorPipeline { + fn new(ctx: &WgpuContext) -> Self { + let module = &ctx + .device + .create_shader_module(wgpu::include_wgsl!("./shaders/merged_vitem.wgsl")); + let layout = ctx + .device + .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { + label: Some("Merged VItem Color Pipeline Layout"), + bind_group_layouts: &[ + &ResolutionInfo::create_bind_group_layout(ctx), + &ViewportBindGroup::bind_group_layout(ctx), + &MergedVItemBuffer::render_bind_group_layout(ctx), + ], + push_constant_ranges: &[], + }); + let pipeline = ctx + .device + .create_render_pipeline(&wgpu::RenderPipelineDescriptor { + label: Some("Merged VItem Color Pipeline"), + layout: Some(&layout), + vertex: wgpu::VertexState { + module, + entry_point: Some("vs_main"), + buffers: &[], + compilation_options: wgpu::PipelineCompilationOptions::default(), + }, + fragment: Some(wgpu::FragmentState { + module, + entry_point: Some("fs_main"), + compilation_options: wgpu::PipelineCompilationOptions::default(), + targets: &[Some(wgpu::ColorTargetState { + format: OUTPUT_TEXTURE_FORMAT, + blend: Some(wgpu::BlendState::ALPHA_BLENDING), + write_mask: wgpu::ColorWrites::ALL, + })], + }), + primitive: wgpu::PrimitiveState { + topology: wgpu::PrimitiveTopology::TriangleStrip, + ..Default::default() + }, + depth_stencil: Some(wgpu::DepthStencilState { + format: wgpu::TextureFormat::Depth32Float, + depth_write_enabled: false, + depth_compare: wgpu::CompareFunction::LessEqual, + stencil: wgpu::StencilState::default(), + bias: wgpu::DepthBiasState::default(), + }), + multisample: wgpu::MultisampleState { + count: 1, + mask: !0, + alpha_to_coverage_enabled: false, + }, + multiview: None, + cache: None, + }); + Self { pipeline } + } +} + +// MARK: Depth pipeline + +pub struct MergedVItemDepthPipeline { + pipeline: wgpu::RenderPipeline, +} + +impl Deref for MergedVItemDepthPipeline { + type Target = wgpu::RenderPipeline; + fn deref(&self) -> &Self::Target { + &self.pipeline + } +} + +impl GpuResource for MergedVItemDepthPipeline { + fn new(ctx: &WgpuContext) -> Self { + let module = &ctx + .device + .create_shader_module(wgpu::include_wgsl!("./shaders/merged_vitem.wgsl")); + let layout = ctx + .device + .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { + label: Some("Merged VItem Depth Pipeline Layout"), + bind_group_layouts: &[ + &ResolutionInfo::create_bind_group_layout(ctx), + &ViewportBindGroup::bind_group_layout(ctx), + &MergedVItemBuffer::render_bind_group_layout(ctx), + ], + push_constant_ranges: &[], + }); + let pipeline = ctx + .device + .create_render_pipeline(&wgpu::RenderPipelineDescriptor { + label: Some("Merged VItem Depth Pipeline"), + layout: Some(&layout), + vertex: wgpu::VertexState { + module, + entry_point: Some("vs_main"), + buffers: &[], + compilation_options: wgpu::PipelineCompilationOptions::default(), + }, + fragment: Some(wgpu::FragmentState { + module, + entry_point: Some("fs_depth_only"), + compilation_options: wgpu::PipelineCompilationOptions::default(), + targets: &[], + }), + primitive: wgpu::PrimitiveState { + topology: wgpu::PrimitiveTopology::TriangleStrip, + ..Default::default() + }, + depth_stencil: Some(wgpu::DepthStencilState { + format: wgpu::TextureFormat::Depth32Float, + depth_write_enabled: true, + depth_compare: wgpu::CompareFunction::Less, + stencil: wgpu::StencilState::default(), + bias: wgpu::DepthBiasState::default(), + }), + multisample: wgpu::MultisampleState { + count: 1, + mask: !0, + alpha_to_coverage_enabled: false, + }, + multiview: None, + cache: None, + }); + Self { pipeline } + } +} diff --git a/packages/ranim-render/src/pipelines/mod.rs b/packages/ranim-render/src/pipelines/mod.rs index 8322482d..51a15584 100644 --- a/packages/ranim-render/src/pipelines/mod.rs +++ b/packages/ranim-render/src/pipelines/mod.rs @@ -1,9 +1,13 @@ //! The pipelines of ranim pub mod debug; +pub mod merged_vitem; pub mod oit_resolve; pub mod vitem; pub mod vitem_compute; +pub use merged_vitem::{ + MergedVItemColorPipeline, MergedVItemComputePipeline, MergedVItemDepthPipeline, +}; pub use oit_resolve::OITResolvePipeline; pub use vitem::{VItemColorPipeline, VItemDepthPipeline}; pub use vitem_compute::VItemComputePipeline; diff --git a/packages/ranim-render/src/pipelines/shaders/merged_vitem.wgsl b/packages/ranim-render/src/pipelines/shaders/merged_vitem.wgsl new file mode 100644 index 00000000..fd8c98b2 --- /dev/null +++ b/packages/ranim-render/src/pipelines/shaders/merged_vitem.wgsl @@ -0,0 +1,349 @@ +// === Shared bindings (group 0: resolution/OIT, group 1: camera) === + +@group(0) @binding(0) var frame: vec3; +@group(0) @binding(1) var pixel_count: array>; +@group(0) @binding(2) var oit_colors: array; +@group(0) @binding(3) var oit_depths: array; + +struct CameraUniforms { + proj_mat: mat4x4, + view_mat: mat4x4, + half_frame_size: vec2, +} +@group(1) @binding(0) var cam_uniforms: CameraUniforms; + +// === Merged VItem data (group 2) === + +struct ItemInfo { + point_offset: u32, + point_count: u32, + attr_offset: u32, + attr_count: u32, +} + +struct PlaneData { + origin: vec4, + basis_u: vec4, + basis_v: vec4, +} + +@group(2) @binding(0) var item_infos: array; +@group(2) @binding(1) var planes: array; +// clip_boxes: 5 i32 per item [min_x, max_x, min_y, max_y, max_w] +@group(2) @binding(2) var clip_boxes: array; +@group(2) @binding(3) var points: array>; +@group(2) @binding(4) var fill_rgbas: array>; +@group(2) @binding(5) var stroke_rgbas: array>; +@group(2) @binding(6) var stroke_widths: array; + +// === Per-instance data passed from vertex to fragment === + +struct VertexOutput { + @builtin(position) frag_pos: vec4, + @location(0) pos: vec2, + @location(1) @interpolate(flat) instance_id: u32, +} + +// === Helper: access item's point/attr data === + +fn item_point(info: ItemInfo, local_idx: u32) -> vec2 { + return points[info.point_offset + local_idx].xy; +} + +fn item_is_closed(info: ItemInfo, local_idx: u32) -> bool { + return bool(points[info.point_offset + local_idx].z); +} + +fn item_fill_rgba(info: ItemInfo, anchor_idx: u32) -> vec4 { + return fill_rgbas[info.attr_offset + anchor_idx]; +} + +fn item_stroke_rgba(info: ItemInfo, anchor_idx: u32) -> vec4 { + return stroke_rgbas[info.attr_offset + anchor_idx]; +} + +fn item_stroke_width(info: ItemInfo, anchor_idx: u32) -> f32 { + return stroke_widths[info.attr_offset + anchor_idx]; +} + +// === SDF math (same as original) === + +fn pack_color(color: vec4) -> u32 { + let c = vec4(color * 255.0); + return (c.r) | (c.g << 8u) | (c.b << 16u) | (c.a << 24u); +} + +fn cross_2d(a: vec2, b: vec2) -> f32 { + return a.x * b.y - a.y * b.x; +} + +fn blend_color(f: vec4, b: vec4) -> vec4 { + let a = f.a + b.a * (1.0 - f.a); + return vec4( + f.r * f.a + b.r * b.a * (1.0 - f.a) / a, + f.g * f.a + b.g * b.a * (1.0 - f.a) / a, + f.b * f.a + b.b * b.a * (1.0 - f.a) / a, + a + ); +} + +fn solve_cubic(a: f32, b: f32, c: f32) -> vec3 { + let p = b - a * a / 3.0; + let p3 = p * p * p; + let q = a * (2.0 * a * a - 9.0 * b) / 27.0 + c; + let d = q * q + 4.0 * p3 / 27.0; + let offset = -a / 3.0; + if (d >= 0.0) { + let z = sqrt(d); + let x = (vec2(z, -z) - q) / 2.0; + let uv = sign(x) * pow(abs(x), vec2(1.0 / 3.0)); + return vec3(offset + uv.x + uv.y); + } + let v = acos(-sqrt(-27.0 / p3) * q / 2.0) / 3.0; + let m = cos(v); + let n = sin(v) * 1.732050808; + return vec3(m + m, -n - m, n - m) * sqrt(-p / 3.0) + offset; +} + +fn distance_bezier(pos: vec2, A: vec2, _B: vec2, C: vec2) -> f32 { + var B = mix(_B + vec2(1e-4), _B, abs(sign(_B * 2.0 - A - C))); + let a = B - A; + let b = A - B * 2.0 + C; + let c = a * 2.0; + let d = A - pos; + let k = vec3(3.0 * dot(a, b), 2.0 * dot(a, a) + dot(d, b), dot(d, a)) / dot(b, b); + let solved = solve_cubic(k.x, k.y, k.z); + let t = vec3( + clamp(solved.x, 0.0, 1.0), + clamp(solved.y, 0.0, 1.0), + clamp(solved.z, 0.0, 1.0), + ); + var ppos = A + (c + b * t.x) * t.x; + var dis = length(ppos - pos); + ppos = A + (c + b * t.y) * t.y; + dis = min(dis, length(ppos - pos)); + ppos = A + (c + b * t.z) * t.z; + dis = min(dis, length(ppos - pos)); + return dis; +} + +fn distance_line(pos: vec2, A: vec2, B: vec2) -> f32 { + let e = B - A; + let w = pos - A; + let b = w - e * clamp(dot(w, e) / dot(e, e), 0.0, 1.0); + return length(b); +} + +fn sign_bezier(p: vec2, A: vec2, B: vec2, C: vec2) -> f32 { + let a: vec2 = C - A; + let b: vec2 = B - A; + let c: vec2 = p - A; + let denominator: f32 = a.x * b.y - b.x * a.y; + let bary: vec2 = vec2(cross_2d(c, b), cross_2d(a, c)) / denominator; + let d: vec2 = vec2(bary.y * 0.5, 0.0) + 1.0 - bary.x - bary.y; + let sign_inside: f32 = select(1.0, sign(d.x * d.x - d.y), d.x > d.y); + let sign_left: f32 = sign_line(p, A, C); + return sign_inside * sign_left; +} + +fn sign_line(p: vec2, A: vec2, B: vec2) -> f32 { + let cond: vec3 = vec3( + (p.y >= A.y), + (p.y < B.y), + (cross_2d(B - A, p - A) > 0.0) + ); + return select(1.0, -1.0, all(cond) || !any(cond)); +} + +// === SDF rendering (adapted for merged buffers) === + +struct SubpathAttr { + end_idx: u32, + nearest_idx: u32, + d: f32, + sgn: f32, +} + +fn get_subpath_attr(pos: vec2, info: ItemInfo, start_local_idx: u32) -> SubpathAttr { + var attr: SubpathAttr; + attr.end_idx = info.point_count; + attr.nearest_idx = 0u; + attr.d = 3.40282346638528859812e38; + attr.sgn = 1.0; + + let n = (info.point_count - 1u) / 2u * 2u; + for (var i = start_local_idx; i < n; i += 2u) { + let a = item_point(info, i); + let b = item_point(info, i + 1u); + let c = item_point(info, i + 2u); + if length(b - a) == 0.0 { + attr.end_idx = i; + break; + } + + let v1 = normalize(b - a); + let v2 = normalize(c - b); + let is_line = abs(cross_2d(v1, v2)) < 0.0001 && dot(v1, v2) > 0.0; + let dist = select(distance_bezier(pos, a, b, c), distance_line(pos, a, c), is_line); + if dist < attr.d { + attr.d = dist; + attr.nearest_idx = i; + } + if item_is_closed(info, i) { + attr.sgn *= select(sign_bezier(pos, a, b, c), sign_line(pos, a, c), is_line); + } + } + + return attr; +} + +fn render(pos: vec2, info: ItemInfo) -> vec4 { + var idx = 0u; + var d = 3.40282346638528859812e38; + var sgn = 1.0; + + var start_idx = 0u; + while start_idx < info.point_count { + let attr = get_subpath_attr(pos, info, start_idx); + if attr.d < d { + idx = attr.nearest_idx; + d = attr.d; + } + sgn *= attr.sgn; + start_idx = attr.end_idx + 2u; + } + + let sgn_d = sgn * d; + + let e = item_point(info, idx + 1u) - item_point(info, idx); + let w = pos - item_point(info, idx); + let ratio = clamp(dot(w, e) / dot(e, e), 0.0, 1.0); + let anchor_index = idx / 2u; + + let antialias_radius = 0.015 / 4.0; + + var fill_rgba: vec4 = select( + vec4(0.0), + mix(item_fill_rgba(info, anchor_index), item_fill_rgba(info, anchor_index + 1u), ratio), + item_is_closed(info, idx) + ); + fill_rgba.a *= smoothstep(1.0, -1.0, (sgn_d) / antialias_radius); + + var stroke_width = mix( + item_stroke_width(info, anchor_index), + item_stroke_width(info, anchor_index + 1u), + ratio + ); + var stroke_rgba: vec4 = mix( + item_stroke_rgba(info, anchor_index), + item_stroke_rgba(info, anchor_index + 1u), + ratio + ); + stroke_rgba.a *= smoothstep(1.0, -1.0, (d - stroke_width) / antialias_radius); + + var f_color = blend_color(stroke_rgba, fill_rgba); + + if (f_color.a < 0.01) { + discard; + } + + return f_color; +} + +// === Fragment shaders === + +struct FragmentOutput { + @location(0) color: vec4, + @builtin(frag_depth) depth: f32, +} + +@fragment +fn fs_main( + @builtin(position) frag_pos: vec4, + @location(0) pos: vec2, + @location(1) @interpolate(flat) instance_id: u32, +) -> FragmentOutput { + var out: FragmentOutput; + let info = item_infos[instance_id]; + let color = render(pos, info); + + if (color.a >= 0.99) { + out.color = color; + out.depth = frag_pos.z; + return out; + } + + let coords = vec2(floor(frag_pos.xy)); + let pixel_idx = coords.y * frame.x + coords.x; + let layer_idx = atomicAdd(&pixel_count[pixel_idx], 1u); + + if (layer_idx < frame.z) { + let buffer_idx = pixel_idx * frame.z + layer_idx; + oit_colors[buffer_idx] = pack_color(color); + oit_depths[buffer_idx] = frag_pos.z; + } + + discard; + out.color = vec4(0.0, 0.0, 0.0, 0.0); + out.depth = 1.0; + return out; +} + +@fragment +fn fs_depth_only( + @builtin(position) frag_pos: vec4, + @location(0) pos: vec2, + @location(1) @interpolate(flat) instance_id: u32, +) -> @builtin(frag_depth) f32 { + let info = item_infos[instance_id]; + let color = render(pos, info); + + if (color.a < 0.99) { + discard; + } + + return frag_pos.z; +} + +// === Vertex shader === + +@vertex +fn vs_main( + @builtin(vertex_index) vertex_index: u32, + @builtin(instance_index) instance_index: u32, +) -> VertexOutput { + var out: VertexOutput; + + let info = item_infos[instance_index]; + let plane = planes[instance_index]; + let clip_base = instance_index * 5u; + + let scale = 1000.0; + let min_x = f32(clip_boxes[clip_base + 0u]) / scale; + let max_x = f32(clip_boxes[clip_base + 1u]) / scale; + let min_y = f32(clip_boxes[clip_base + 2u]) / scale; + let max_y = f32(clip_boxes[clip_base + 3u]) / scale; + let max_w = f32(clip_boxes[clip_base + 4u]) / scale; + + var clip_point: vec2; + clip_point.x = select( + max_x + max_w, + min_x - max_w, + (vertex_index & 2u) == 0u + ); + clip_point.y = select( + max_y + max_w, + min_y - max_w, + (vertex_index & 1u) == 0u + ); + + let u = clip_point.x; + let v = clip_point.y; + + let pos3d = plane.origin.xyz + u * plane.basis_u.xyz + v * plane.basis_v.xyz; + + out.frag_pos = cam_uniforms.proj_mat * cam_uniforms.view_mat * vec4(pos3d, 1.0); + out.pos = clip_point; + out.instance_id = instance_index; + return out; +} diff --git a/packages/ranim-render/src/pipelines/shaders/merged_vitem_compute.wgsl b/packages/ranim-render/src/pipelines/shaders/merged_vitem_compute.wgsl new file mode 100644 index 00000000..9d4d2ba5 --- /dev/null +++ b/packages/ranim-render/src/pipelines/shaders/merged_vitem_compute.wgsl @@ -0,0 +1,94 @@ +// Per-item metadata +struct ItemInfo { + point_offset: u32, + point_count: u32, + attr_offset: u32, + attr_count: u32, +} + +struct Plane { + origin: vec3, + basis_u: vec3, + basis_v: vec3, +} + +// Padded version matching the Rust repr +struct PlaneData { + origin: vec4, + basis_u: vec4, + basis_v: vec4, +} + +struct ClipBox { + min_x: atomic, + max_x: atomic, + min_y: atomic, + max_y: atomic, + max_w: atomic, +} + +@group(0) @binding(0) var item_infos: array; +@group(0) @binding(1) var planes: array; +@group(0) @binding(2) var points3d: array>; +@group(0) @binding(3) var stroke_widths: array; +@group(0) @binding(4) var points2d: array>; +// clip_boxes: 5 i32 per item, laid out as [min_x, max_x, min_y, max_y, max_w, ...] +@group(0) @binding(5) var clip_boxes: array>; + +@compute +@workgroup_size(256) +fn cs_main( + @builtin(global_invocation_id) global_invocation_id: vec3, +) { + let total_points = arrayLength(&points3d); + let index = global_invocation_id.x; + if index >= total_points { + return; + } + + // Binary search to find which item this point belongs to + let item_count = arrayLength(&item_infos); + var lo = 0u; + var hi = item_count; + while lo < hi { + let mid = (lo + hi) / 2u; + let info = item_infos[mid]; + if index < info.point_offset { + hi = mid; + } else if index >= info.point_offset + info.point_count { + lo = mid + 1u; + } else { + lo = mid; + break; + } + } + let item_idx = lo; + let info = item_infos[item_idx]; + let plane_data = planes[item_idx]; + + let plane_origin = plane_data.origin.xyz; + let plane_basis_u = plane_data.basis_u.xyz; + let plane_basis_v = plane_data.basis_v.xyz; + + let p_vec = points3d[index]; + let p = p_vec.xyz; + let is_closed = p_vec.w; + let diff = p - plane_origin; + + let x = dot(diff, plane_basis_u); + let y = dot(diff, plane_basis_v); + + // Local index within this item's points + let local_idx = index - info.point_offset; + let w = stroke_widths[info.attr_offset + local_idx / 2u]; + + points2d[index] = vec4(x, y, is_closed, 0.0); + + let scale = 1000.0; + let clip_base = item_idx * 5u; + atomicMin(&clip_boxes[clip_base + 0u], i32(floor(x * scale))); + atomicMax(&clip_boxes[clip_base + 1u], i32(ceil(x * scale))); + atomicMin(&clip_boxes[clip_base + 2u], i32(floor(y * scale))); + atomicMax(&clip_boxes[clip_base + 3u], i32(ceil(y * scale))); + atomicMax(&clip_boxes[clip_base + 4u], i32(ceil(w * scale))); +} diff --git a/packages/ranim-render/src/primitives.rs b/packages/ranim-render/src/primitives.rs index a0f8ceb9..00ce0349 100644 --- a/packages/ranim-render/src/primitives.rs +++ b/packages/ranim-render/src/primitives.rs @@ -1,3 +1,4 @@ +pub mod merged_vitem; pub mod viewport; pub mod vitem; diff --git a/packages/ranim-render/src/primitives/merged_vitem.rs b/packages/ranim-render/src/primitives/merged_vitem.rs new file mode 100644 index 00000000..1162bb80 --- /dev/null +++ b/packages/ranim-render/src/primitives/merged_vitem.rs @@ -0,0 +1,290 @@ +use crate::utils::{WgpuContext, WgpuVecBuffer}; +use bytemuck::{Pod, Zeroable}; +use glam::Vec4; +use ranim_core::{ + components::{rgba::Rgba, width::Width}, + core_item::vitem::VItem, +}; + +/// Per-item metadata stored in a GPU buffer. +/// Tells shaders where each VItem's data lives in the merged buffers. +#[repr(C)] +#[derive(Debug, Default, Clone, Copy, Pod, Zeroable)] +pub struct ItemInfo { + /// Offset into the merged points buffer + pub point_offset: u32, + /// Number of points for this item + pub point_count: u32, + /// Offset into the merged attribute buffers (fill_rgbas, stroke_rgbas, stroke_widths) + pub attr_offset: u32, + /// Number of attributes (= point_count.div_ceil(2)) + pub attr_count: u32, +} + +/// Per-item plane data (origin + basis), stored as array of structs. +#[repr(C)] +#[derive(Debug, Default, Clone, Copy, Pod, Zeroable)] +pub struct PlaneData { + pub origin: Vec4, // xyz, w=pad + pub basis_u: Vec4, // xyz, w=pad + pub basis_v: Vec4, // xyz, w=pad +} + +/// Merged GPU buffers for all VItems in a frame. +/// +/// Instead of one set of buffers per VItem, all data is packed into +/// contiguous arrays with an index table (`item_infos`) that tells +/// shaders where each item's data lives. +pub struct MergedVItemBuffer { + /// Per-item metadata: offsets and counts + pub(crate) item_infos_buffer: WgpuVecBuffer, + /// Per-item plane data (origin + basis) + pub(crate) planes_buffer: WgpuVecBuffer, + /// Per-item clip boxes (5 i32 each: min_x, max_x, min_y, max_y, max_w) + pub(crate) clip_boxes_buffer: WgpuVecBuffer, + + /// Merged 3D points from all VItems + pub(crate) points3d_buffer: WgpuVecBuffer, + /// Merged 2D projected points (written by compute shader) + pub(crate) points2d_buffer: WgpuVecBuffer, + /// Merged fill colors + pub(crate) fill_rgbas_buffer: WgpuVecBuffer, + /// Merged stroke colors + pub(crate) stroke_rgbas_buffer: WgpuVecBuffer, + /// Merged stroke widths + pub(crate) stroke_widths_buffer: WgpuVecBuffer, + + /// Number of items + pub(crate) item_count: u32, + /// Total number of points across all items + pub(crate) total_points: u32, + + /// Compute bind group (recreated when buffers resize) + pub(crate) compute_bind_group: Option, + /// Render bind group (recreated when buffers resize) + pub(crate) render_bind_group: Option, +} + +impl MergedVItemBuffer { + pub fn new(ctx: &WgpuContext) -> Self { + // Start with empty buffers (minimum size 1 to avoid zero-size buffer) + let storage_rw = wgpu::BufferUsages::STORAGE + | wgpu::BufferUsages::COPY_DST + | wgpu::BufferUsages::COPY_SRC; + let storage_ro = wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST; + + Self { + item_infos_buffer: WgpuVecBuffer::new(ctx, Some("Merged ItemInfos"), storage_ro, 1), + planes_buffer: WgpuVecBuffer::new(ctx, Some("Merged Planes"), storage_ro, 1), + clip_boxes_buffer: WgpuVecBuffer::new(ctx, Some("Merged ClipBoxes"), storage_rw, 5), + points3d_buffer: WgpuVecBuffer::new(ctx, Some("Merged Points3D"), storage_ro, 1), + points2d_buffer: WgpuVecBuffer::new(ctx, Some("Merged Points2D"), storage_rw, 1), + fill_rgbas_buffer: WgpuVecBuffer::new(ctx, Some("Merged FillRgbas"), storage_ro, 1), + stroke_rgbas_buffer: WgpuVecBuffer::new(ctx, Some("Merged StrokeRgbas"), storage_ro, 1), + stroke_widths_buffer: WgpuVecBuffer::new( + ctx, + Some("Merged StrokeWidths"), + storage_ro, + 1, + ), + item_count: 0, + total_points: 0, + compute_bind_group: None, + render_bind_group: None, + } + } + + /// Pack all VItems into the merged buffers. Called once per frame. + pub fn update(&mut self, ctx: &WgpuContext, vitems: &[((usize, usize), VItem)]) { + if vitems.is_empty() { + self.item_count = 0; + self.total_points = 0; + return; + } + + let item_count = vitems.len(); + + // Pre-calculate total sizes + let total_points: usize = vitems.iter().map(|(_, v)| v.points.len()).sum(); + let total_attrs: usize = vitems.iter().map(|(_, v)| v.points.len().div_ceil(2)).sum(); + + // Build index table and collect data + let mut item_infos = Vec::with_capacity(item_count); + let mut planes = Vec::with_capacity(item_count); + let mut all_points3d = Vec::with_capacity(total_points); + let mut all_fill_rgbas = Vec::with_capacity(total_attrs); + let mut all_stroke_rgbas = Vec::with_capacity(total_attrs); + let mut all_stroke_widths = Vec::with_capacity(total_attrs); + + let mut point_offset: u32 = 0; + let mut attr_offset: u32 = 0; + + for (_, vitem) in vitems { + let pc = vitem.points.len() as u32; + let ac = pc.div_ceil(2); + + item_infos.push(ItemInfo { + point_offset, + point_count: pc, + attr_offset, + attr_count: ac, + }); + + planes.push(PlaneData { + origin: Vec4::from((vitem.origin, 0.0)), + basis_u: Vec4::from((vitem.basis.u().as_vec3(), 0.0)), + basis_v: Vec4::from((vitem.basis.v().as_vec3(), 0.0)), + }); + + all_points3d.extend_from_slice(&vitem.points); + all_fill_rgbas.extend_from_slice(&vitem.fill_rgbas); + all_stroke_rgbas.extend_from_slice(&vitem.stroke_rgbas); + all_stroke_widths.extend_from_slice(&vitem.stroke_widths); + + point_offset += pc; + attr_offset += ac; + } + + // Build clip_boxes initial values: [MAX, MIN, MAX, MIN, 0] per item + let mut clip_boxes = Vec::with_capacity(item_count * 5); + for _ in 0..item_count { + clip_boxes.extend_from_slice(&[i32::MAX, i32::MIN, i32::MAX, i32::MIN, 0]); + } + + // Points2d: zeroed, same size as points3d + let points2d = vec![Vec4::ZERO; total_points]; + + self.item_count = item_count as u32; + self.total_points = total_points as u32; + + // Upload all data — track if any buffer was reallocated + let mut any_realloc = false; + any_realloc |= self.item_infos_buffer.set(ctx, &item_infos); + any_realloc |= self.planes_buffer.set(ctx, &planes); + any_realloc |= self.clip_boxes_buffer.set(ctx, &clip_boxes); + any_realloc |= self.points3d_buffer.set(ctx, &all_points3d); + any_realloc |= self.points2d_buffer.set(ctx, &points2d); + any_realloc |= self.fill_rgbas_buffer.set(ctx, &all_fill_rgbas); + any_realloc |= self.stroke_rgbas_buffer.set(ctx, &all_stroke_rgbas); + any_realloc |= self.stroke_widths_buffer.set(ctx, &all_stroke_widths); + + // Recreate bind groups if any buffer was reallocated + if any_realloc || self.compute_bind_group.is_none() { + self.compute_bind_group = Some(Self::create_compute_bind_group(ctx, self)); + self.render_bind_group = Some(Self::create_render_bind_group(ctx, self)); + } + } + + pub fn item_count(&self) -> u32 { + self.item_count + } + + pub fn total_points(&self) -> u32 { + self.total_points + } + + // MARK: Bind group layouts + + pub fn compute_bind_group_layout(ctx: &WgpuContext) -> wgpu::BindGroupLayout { + ctx.device + .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { + label: Some("Merged VItem Compute BGL"), + entries: &[ + // binding 0: item_infos (read-only) + bgl_entry(0, wgpu::ShaderStages::COMPUTE, false), + // binding 1: planes (read-only) + bgl_entry(1, wgpu::ShaderStages::COMPUTE, false), + // binding 2: points3d (read-only) + bgl_entry(2, wgpu::ShaderStages::COMPUTE, false), + // binding 3: stroke_widths (read-only) + bgl_entry(3, wgpu::ShaderStages::COMPUTE, false), + // binding 4: points2d (read-write) + bgl_entry(4, wgpu::ShaderStages::COMPUTE, true), + // binding 5: clip_boxes (read-write) + bgl_entry(5, wgpu::ShaderStages::COMPUTE, true), + ], + }) + } + + pub fn render_bind_group_layout(ctx: &WgpuContext) -> wgpu::BindGroupLayout { + let vf = wgpu::ShaderStages::VERTEX | wgpu::ShaderStages::FRAGMENT; + let v = wgpu::ShaderStages::VERTEX; + ctx.device + .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { + label: Some("Merged VItem Render BGL"), + entries: &[ + // binding 0: item_infos + bgl_entry(0, vf, false), + // binding 1: planes + bgl_entry(1, v, false), + // binding 2: clip_boxes + bgl_entry(2, v, false), + // binding 3: points2d + bgl_entry(3, vf, false), + // binding 4: fill_rgbas + bgl_entry(4, vf, false), + // binding 5: stroke_rgbas + bgl_entry(5, vf, false), + // binding 6: stroke_widths + bgl_entry(6, vf, false), + ], + }) + } + + fn create_compute_bind_group(ctx: &WgpuContext, this: &Self) -> wgpu::BindGroup { + ctx.device.create_bind_group(&wgpu::BindGroupDescriptor { + label: Some("Merged VItem Compute BG"), + layout: &Self::compute_bind_group_layout(ctx), + entries: &[ + bg_entry(0, &this.item_infos_buffer.buffer), + bg_entry(1, &this.planes_buffer.buffer), + bg_entry(2, &this.points3d_buffer.buffer), + bg_entry(3, &this.stroke_widths_buffer.buffer), + bg_entry(4, &this.points2d_buffer.buffer), + bg_entry(5, &this.clip_boxes_buffer.buffer), + ], + }) + } + + fn create_render_bind_group(ctx: &WgpuContext, this: &Self) -> wgpu::BindGroup { + ctx.device.create_bind_group(&wgpu::BindGroupDescriptor { + label: Some("Merged VItem Render BG"), + layout: &Self::render_bind_group_layout(ctx), + entries: &[ + bg_entry(0, &this.item_infos_buffer.buffer), + bg_entry(1, &this.planes_buffer.buffer), + bg_entry(2, &this.clip_boxes_buffer.buffer), + bg_entry(3, &this.points2d_buffer.buffer), + bg_entry(4, &this.fill_rgbas_buffer.buffer), + bg_entry(5, &this.stroke_rgbas_buffer.buffer), + bg_entry(6, &this.stroke_widths_buffer.buffer), + ], + }) + } +} + +fn bgl_entry( + binding: u32, + visibility: wgpu::ShaderStages, + read_write: bool, +) -> wgpu::BindGroupLayoutEntry { + wgpu::BindGroupLayoutEntry { + binding, + visibility, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Storage { + read_only: !read_write, + }, + has_dynamic_offset: false, + min_binding_size: None, + }, + count: None, + } +} + +fn bg_entry(binding: u32, buffer: &wgpu::Buffer) -> wgpu::BindGroupEntry<'_> { + wgpu::BindGroupEntry { + binding, + resource: wgpu::BindingResource::Buffer(buffer.as_entire_buffer_binding()), + } +} diff --git a/packages/ranim-render/src/primitives/vitem.rs b/packages/ranim-render/src/primitives/vitem.rs index f5ab1bf1..6965eb60 100644 --- a/packages/ranim-render/src/primitives/vitem.rs +++ b/packages/ranim-render/src/primitives/vitem.rs @@ -330,4 +330,94 @@ mod tests { .save("../../output/vitem2d_intersecting_perspective_depth.png") .unwrap(); } + + /// Render the same scene with the merged buffer path for visual comparison. + #[test] + fn render_merged_vitem2d_primitive() { + let ctx = pollster::block_on(WgpuContext::new()); + let mut renderer = Renderer::new(&ctx, 1280, 720, 8); + let mut render_textures = renderer.new_render_textures(&ctx); + let clear_color = wgpu::Color { + r: 0.8, + g: 0.8, + b: 0.8, + a: 1.0, + }; + + let mut camera = CameraFrame::new(); + camera.pos = DVec3::new(3.0, 3.0, 3.0); + camera.facing = DVec3::new(-1.0, -1.0, -1.0).normalize(); + camera.up = DVec3::Y; + camera.perspective_blend = 1.0; + + let scale = 2.0; + let mut points = vec![ + Vec4::new(-1.0, -1.0, 0.0, 1.0), + Vec4::new(-1.0, 0.0, 0.0, 1.0), + Vec4::new(-1.0, 1.0, 0.0, 1.0), + Vec4::new(0.0, 1.0, 0.0, 1.0), + Vec4::new(1.0, 1.0, 0.0, 1.0), + Vec4::new(1.0, 0.0, 0.0, 1.0), + Vec4::new(1.0, -1.0, 0.0, 1.0), + Vec4::new(0.0, -1.0, 0.0, 1.0), + Vec4::new(-1.0, -1.0, 0.0, 1.0), + ]; + let n = points.len().div_ceil(2); + points.iter_mut().for_each(|p| { + p.x *= scale; + p.y *= scale; + }); + + let make_items = |origin: Vec3, alpha: f32| { + let item1 = VItem { + origin, + basis: Basis2d::XY, + points: points.clone(), + fill_rgbas: vec![Rgba(vec4(1.0, 0.0, 0.0, alpha)); n], + stroke_rgbas: vec![Rgba(vec4(0.5, 0.0, 0.0, 1.0)); n], + stroke_widths: vec![Width(0.02); n], + }; + let item2 = VItem { + origin, + basis: Basis2d::YZ, + points: points.clone(), + fill_rgbas: vec![Rgba(vec4(0.0, 1.0, 0.0, alpha)); n], + stroke_rgbas: vec![Rgba(vec4(0.0, 0.5, 0.0, 1.0)); n], + stroke_widths: vec![Width(0.02); n], + }; + let item3 = VItem { + origin, + basis: Basis2d::XZ, + points: points.clone(), + fill_rgbas: vec![Rgba(vec4(0.0, 0.0, 1.0, alpha)); n], + stroke_rgbas: vec![Rgba(vec4(0.0, 0.0, 0.5, 1.0)); n], + stroke_widths: vec![Width(0.02); n], + }; + std::iter::once(item1) + .chain(std::iter::once(item2)) + .chain(std::iter::once(item3)) + }; + + let mut pool = RenderPool::new(); + let mut store = CoreItemStore::new(); + let center = Vec3::ZERO; + let dir = (Vec3::X + Vec3::NEG_Z).normalize(); + store.update( + make_items(-scale * 1.5 * dir + center, 1.0) + .chain(make_items(scale * 1.5 * dir + center, 0.5)) + .map(CoreItem::VItem) + .chain(std::iter::once(CoreItem::CameraFrame(camera))) + .enumerate() + .map(|(id, x)| ((id, id), x)), + ); + + renderer.render_store_with_pool(&ctx, &mut render_textures, clear_color, &store, &mut pool); + ctx.device + .poll(wgpu::PollType::wait_indefinitely()) + .unwrap(); + + let img = render_textures.get_rendered_texture_img_buffer(&ctx); + img.save("../../output/merged_vitem2d_intersecting_perspective.png") + .unwrap(); + } }