Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions benches/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ bench = false
itertools.workspace = true
criterion = { version = "0.8.1", features = ["html_reports"] }
ranim = { path = "../", features = ["render"] }
ranim-core = { path = "../packages/ranim-core" }
ranim-render = { path = "../packages/ranim-render" }
wgpu = { workspace = true }
pollster = "0.4.0"

[[bench]]
name = "eval"
Expand All @@ -25,3 +29,7 @@ harness = false
[[bench]]
name = "extract"
harness = false

[[bench]]
name = "gpu_render"
harness = false
243 changes: 243 additions & 0 deletions benches/benches/gpu_render.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,243 @@
//! GPU rendering benchmark — isolates the pure render_store_with_pool cost.
//!
//! Measures:
//! - CPU-side submission time (buffer upload + command encoding + queue submit)
//! - Scales with VItem count to identify bottleneck (draw calls vs SDF vs upload)

use std::hint::black_box;

use benches::test_scenes::static_squares;
use criterion::{BenchmarkId, Criterion, SamplingMode, criterion_group, criterion_main};
use ranim::{SceneConstructor, prelude::*};
use ranim_core::store::CoreItemStore;
use ranim_render::{Renderer, resource::RenderPool, utils::WgpuContext};

/// Pure GPU render benchmark: only measures render_store_with_pool + device.poll
fn gpu_render_benchmark(c: &mut Criterion) {
let ctx = pollster::block_on(WgpuContext::new());

let mut group = c.benchmark_group("gpu_render");
group.sampling_mode(SamplingMode::Flat).sample_size(50);

for n in [5, 10, 20, 40, 60].iter() {
let vitem_count = n * n;

// Build the scene and eval to get a CoreItemStore
let scene = (|r: &mut RanimScene| static_squares(r, *n)).build_scene();
let mut store = CoreItemStore::new();
store.update(scene.eval_at_alpha(0.5));

let mut renderer = Renderer::new(&ctx, 1920, 1080, 8);
let mut render_textures = renderer.new_render_textures(&ctx);
let mut pool = RenderPool::new();
let clear_color = wgpu::Color {
r: 0.2,
g: 0.2,
b: 0.2,
a: 1.0,
};

// Warm up: render once to initialize all GPU resources
renderer.render_store_with_pool(&ctx, &mut render_textures, clear_color, &store, &mut pool);
pool.clean();
ctx.device
.poll(wgpu::PollType::wait_indefinitely())
.unwrap();

group.bench_with_input(
BenchmarkId::new("submit", vitem_count),
&vitem_count,
|b, _| {
b.iter(|| {
renderer.render_store_with_pool(
&ctx,
&mut render_textures,
clear_color,
&store,
&mut pool,
);
pool.clean();
// Wait for GPU to finish so we measure actual GPU time too
ctx.device
.poll(wgpu::PollType::wait_indefinitely())
.unwrap();
black_box(());
});
},
);
}

group.finish();
}

/// Measures just the CPU-side submission cost (no GPU wait)
fn cpu_submit_benchmark(c: &mut Criterion) {
let ctx = pollster::block_on(WgpuContext::new());

let mut group = c.benchmark_group("cpu_submit");
group.sampling_mode(SamplingMode::Flat).sample_size(50);

for n in [5, 10, 20, 40, 60].iter() {
let vitem_count = n * n;

let scene = (|r: &mut RanimScene| static_squares(r, *n)).build_scene();
let mut store = CoreItemStore::new();
store.update(scene.eval_at_alpha(0.5));

let mut renderer = Renderer::new(&ctx, 1920, 1080, 8);
let mut render_textures = renderer.new_render_textures(&ctx);
let mut pool = RenderPool::new();
let clear_color = wgpu::Color {
r: 0.2,
g: 0.2,
b: 0.2,
a: 1.0,
};

// Warm up
renderer.render_store_with_pool(&ctx, &mut render_textures, clear_color, &store, &mut pool);
pool.clean();
ctx.device
.poll(wgpu::PollType::wait_indefinitely())
.unwrap();

group.bench_with_input(
BenchmarkId::new("no_wait", vitem_count),
&vitem_count,
|b, _| {
b.iter(|| {
renderer.render_store_with_pool(
&ctx,
&mut render_textures,
clear_color,
&store,
&mut pool,
);
pool.clean();
// Don't wait — measures pure CPU submission overhead
black_box(());
});
},
);
}

group.finish();
}

/// Merged buffer path: GPU render benchmark (with GPU wait)
fn merged_gpu_render_benchmark(c: &mut Criterion) {
let ctx = pollster::block_on(WgpuContext::new());

let mut group = c.benchmark_group("merged_gpu_render");
group.sampling_mode(SamplingMode::Flat).sample_size(50);

for n in [5, 10, 20, 40, 60].iter() {
let vitem_count = n * n;

let scene = (|r: &mut RanimScene| static_squares(r, *n)).build_scene();
let mut store = CoreItemStore::new();
store.update(scene.eval_at_alpha(0.5));

let mut renderer = Renderer::new(&ctx, 1920, 1080, 8);
let mut render_textures = renderer.new_render_textures(&ctx);
let mut pool = RenderPool::new();
let clear_color = wgpu::Color {
r: 0.2,
g: 0.2,
b: 0.2,
a: 1.0,
};

// Warm up
renderer.render_store_with_pool(&ctx, &mut render_textures, clear_color, &store, &mut pool);
pool.clean();
ctx.device
.poll(wgpu::PollType::wait_indefinitely())
.unwrap();

group.bench_with_input(
BenchmarkId::new("submit", vitem_count),
&vitem_count,
|b, _| {
b.iter(|| {
renderer.render_store_with_pool(
&ctx,
&mut render_textures,
clear_color,
&store,
&mut pool,
);
pool.clean();
ctx.device
.poll(wgpu::PollType::wait_indefinitely())
.unwrap();
black_box(());
});
},
);
}

group.finish();
}

/// Merged buffer path: CPU-only submission benchmark (no GPU wait)
fn merged_cpu_submit_benchmark(c: &mut Criterion) {
let ctx = pollster::block_on(WgpuContext::new());

let mut group = c.benchmark_group("merged_cpu_submit");
group.sampling_mode(SamplingMode::Flat).sample_size(50);

for n in [5, 10, 20, 40, 60].iter() {
let vitem_count = n * n;

let scene = (|r: &mut RanimScene| static_squares(r, *n)).build_scene();
let mut store = CoreItemStore::new();
store.update(scene.eval_at_alpha(0.5));

let mut renderer = Renderer::new(&ctx, 1920, 1080, 8);
let mut render_textures = renderer.new_render_textures(&ctx);
let mut pool = RenderPool::new();
let clear_color = wgpu::Color {
r: 0.2,
g: 0.2,
b: 0.2,
a: 1.0,
};

// Warm up
renderer.render_store_with_pool(&ctx, &mut render_textures, clear_color, &store, &mut pool);
pool.clean();
ctx.device
.poll(wgpu::PollType::wait_indefinitely())
.unwrap();

group.bench_with_input(
BenchmarkId::new("no_wait", vitem_count),
&vitem_count,
|b, _| {
b.iter(|| {
renderer.render_store_with_pool(
&ctx,
&mut render_textures,
clear_color,
&store,
&mut pool,
);
pool.clean();
black_box(());
});
},
);
}

group.finish();
}

criterion_group!(
benches,
gpu_render_benchmark,
cpu_submit_benchmark,
merged_gpu_render_benchmark,
merged_cpu_submit_benchmark
);
criterion_main!(benches);
7 changes: 7 additions & 0 deletions packages/ranim-render/src/graph/view.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,13 @@ pub use vitem_depth::*;
pub mod oit_resolve;
pub use oit_resolve::*;

pub mod merged_vitem_compute;
pub use merged_vitem_compute::*;
pub mod merged_vitem_depth;
pub use merged_vitem_depth::*;
pub mod merged_vitem_color;
pub use merged_vitem_color::*;

use crate::{
RenderContext,
graph::{GlobalRenderNodeTrait, RenderPacketsQuery},
Expand Down
68 changes: 68 additions & 0 deletions packages/ranim-render/src/graph/view/merged_vitem_color.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
use crate::{
RenderContext, RenderTextures,
graph::{RenderPacketsQuery, view::ViewRenderNodeTrait},
pipelines::MergedVItemColorPipeline,
primitives::viewport::ViewportGpuPacket,
};

pub struct MergedVItemColorNode;

impl ViewRenderNodeTrait for MergedVItemColorNode {
type Query = ();

fn run(
&self,
#[cfg(not(feature = "profiling"))] encoder: &mut wgpu::CommandEncoder,
#[cfg(feature = "profiling")] encoder: &mut wgpu_profiler::Scope<'_, wgpu::CommandEncoder>,
_packets: <Self::Query as RenderPacketsQuery>::Output<'_>,
ctx: RenderContext,
viewport: &ViewportGpuPacket,
) {
let Some(merged) = ctx.merged_buffer else {
return;
};
if merged.item_count() == 0 {
return;
}

let RenderTextures {
render_view,
depth_stencil_view,
..
} = ctx.render_textures;
let rpass_desc = wgpu::RenderPassDescriptor {
label: Some("Merged VItem Color Render Pass"),
color_attachments: &[Some(wgpu::RenderPassColorAttachment {
view: render_view,
resolve_target: None,
depth_slice: None,
ops: wgpu::Operations {
load: wgpu::LoadOp::Load,
store: wgpu::StoreOp::Store,
},
})],
depth_stencil_attachment: Some(wgpu::RenderPassDepthStencilAttachment {
view: depth_stencil_view,
depth_ops: Some(wgpu::Operations {
load: wgpu::LoadOp::Load,
store: wgpu::StoreOp::Store,
}),
stencil_ops: None,
}),
timestamp_writes: None,
occlusion_query_set: None,
};
#[cfg(feature = "profiling")]
let mut rpass = encoder.scoped_render_pass("Merged VItem Color Render Pass", rpass_desc);
#[cfg(not(feature = "profiling"))]
let mut rpass = encoder.begin_render_pass(&rpass_desc);
rpass.set_pipeline(
&ctx.pipelines
.get_or_init::<MergedVItemColorPipeline>(ctx.wgpu_ctx),
);
rpass.set_bind_group(0, &ctx.resolution_info.bind_group, &[]);
rpass.set_bind_group(1, &viewport.uniforms_bind_group.bind_group, &[]);
rpass.set_bind_group(2, merged.render_bind_group.as_ref().unwrap(), &[]);
rpass.draw(0..4, 0..merged.item_count());
}
}
Loading