use super::CpuAggregate; use crate::globals::get_options; use crate::lights::sampler::create_light_sampler; use crate::Arena; use crate::ParameterDictionary; use crate::PbrtProgress; use rayon::prelude::*; use shared::core::bxdf::{FArgs, TransportMode}; use shared::core::camera::{Camera, CameraTrait}; use shared::core::film::VisibleSurface; use shared::core::filter::{Filter, FilterTrait}; use shared::core::geometry::{ Bounds2i, Point2f, Point2i, Point3f, Point3fi, Ray, RayDifferential, Vector2f, Vector3f, VectorLike, }; use shared::core::interaction::InteractionTrait; use shared::core::light::{Light, LightSampleContext, LightTrait}; use shared::core::material::{MaterialEvalContext, MaterialTrait}; use shared::core::primitive::Primitive; use shared::core::sampler::{get_camera_sample, CameraSample, Sampler, SamplerTrait}; use shared::core::texture::{TextureEvalContext, UniversalTextureEvaluator}; use shared::lights::sampler::{LightSampler, LightSamplerTrait}; use shared::spectra::{SampledSpectrum, SampledWavelengths}; use shared::utils::math::square; use shared::utils::sampling::power_heuristic; use shared::utils::soa::{SoA, SoAAllocator, WorkQueue}; use shared::wavefront::workitems::*; use shared::wavefront::{WavefrontAggregate, WavefrontPathIntegrator, WavefrontRenderer}; use shared::{gvec, Ptr}; use std::ops::{Deref, DerefMut}; use std::sync::Arc; pub struct CpuWavefrontRenderer(pub WavefrontPathIntegrator); impl Deref for CpuWavefrontRenderer { type Target = WavefrontPathIntegrator; fn deref(&self) -> &Self::Target { &self.0 } } impl DerefMut for CpuWavefrontRenderer { fn deref_mut(&mut self) -> &mut Self::Target { &mut self.0 } } pub trait CreateWavefront where Self: Sized, { fn create( parameters: ParameterDictionary, camera: Arc, sampler: Arc, aggregate: Arc, lights: Vec>, arena: &Arena, ) -> CpuWavefrontRenderer { let max_depth = parameters .get_one_int("maxdepth", 5) .expect("Could not obtain depth value"); let regularize = parameters .get_one_bool("regularize", false) .expect("Could not obtain regularize flag value"); let spp = sampler.samples_per_pixel() as u32; let film = camera.base().film; let pixel_bounds = film.pixel_bounds(); let filter = Ptr::from(&film.base().filter); let light_sampler = create_light_sampler("power", &lights, arena); let res_x = pixel_bounds.diagonal().x() as u32; let max_samples = 1024u32 * 1024; let scanlines_per_pass = (max_samples / res_x).max(1); let max_queue_size = res_x * scanlines_per_pass; let mut infinite_lights = gvec(); for light in &lights { if light.light_type().is_infinite() { infinite_lights.push(arena.alloc(**light)); } } // for light in let cpu_aggregate = CpuAggregate::new(*aggregate); CpuWavefrontRenderer(WavefrontPathIntegrator { aggregate: cpu_aggregate, camera: (*camera).clone(), sampler: (*sampler).clone(), max_depth: max_depth.try_into().unwrap(), film, filter, samples_per_pixel: spp, regularize, infinite_lights, max_queue_size, scanlines_per_pass, light_sampler, ray_queues: [ WorkQueue::new( RayWorkItemSoA::allocate(max_queue_size, arena), max_queue_size, ), WorkQueue::new( RayWorkItemSoA::allocate(max_queue_size, arena), max_queue_size, ), ], shadow_ray_queue: WorkQueue::new( ShadowRayWorkItemSoA::allocate(max_queue_size, arena), max_queue_size, ), escaped_ray_queue: WorkQueue::new( EscapedRayWorkItemSoA::allocate(max_queue_size, arena), max_queue_size, ), hit_area_light_queue: WorkQueue::new( HitAreaLightWorkItemSoA::allocate(max_queue_size, arena), max_queue_size, ), basic_eval_material_queue: WorkQueue::new( MaterialEvalWorkItemSoA::allocate(max_queue_size, arena), max_queue_size, ), universal_eval_material_queue: WorkQueue::new( MaterialEvalWorkItemSoA::allocate(max_queue_size, arena), max_queue_size, ), pixel_sample_state: PixelSampleState::allocate(max_queue_size, arena), }) } } impl CreateWavefront for CpuWavefrontRenderer {} impl CpuWavefrontRenderer { pub fn render(&mut self) { let film = self.camera.get_film(); let filter = film.get_filter(); let pixel_bounds = film.pixel_bounds(); let resolution = pixel_bounds.diagonal(); let total_work = (pixel_bounds.area() as u64) * (self.samples_per_pixel as u64); let options = get_options(); let progress = PbrtProgress::new(total_work, "Rendering", options.quiet); for sample_index in 0..self.samples_per_pixel { let mut y0 = pixel_bounds.p_min.y(); while y0 < pixel_bounds.p_max.y() { let y1 = (y0 + self.scanlines_per_pass as i32).min(pixel_bounds.p_max.y()); // Reset the primary ray queue for this set self.ray_queues[0].reset(); self.generate_camera_rays(y0, y1, sample_index, &pixel_bounds); for depth in 0..=self.max_depth { let current = (depth % 2) as usize; let next = ((depth + 1) % 2) as usize; // Reset queues before tracing next batch of rays self.ray_queues[next].reset(); self.escaped_ray_queue.reset(); self.hit_area_light_queue.reset(); self.basic_eval_material_queue.reset(); self.universal_eval_material_queue.reset(); self.shadow_ray_queue.reset(); if self.ray_queues[current].size() == 0 { break; } self.generate_ray_samples(depth, sample_index); self.aggregate.intersect_closest( self.max_queue_size as usize, &self.ray_queues[current], &self.escaped_ray_queue, &self.hit_area_light_queue, &self.basic_eval_material_queue, &self.universal_eval_material_queue, &self.ray_queues[next], &self.pixel_sample_state, ); self.handle_escaped_rays(); self.handle_emissive_intersections(); if depth == self.max_depth { break; } self.evaluate_materials_and_bsdfs(depth); self.aggregate.intersect_shadow( self.max_queue_size as usize, &self.shadow_ray_queue, &self.pixel_sample_state, ); } self.update_film(y0, y1, &pixel_bounds); let batch_pixels = ((y1 - y0) * (pixel_bounds.p_max.x() - pixel_bounds.p_min.x())) as u64; progress.update(batch_pixels); y0 = y1; } } } fn generate_camera_rays( &mut self, y0: i32, y1: i32, sample_index: u32, pixel_bounds: &Bounds2i, ) { let filter = self.filter; let film = self.film; let camera = &self.camera; let sampler_proto = &self.sampler; let pixel_sample_state = &self.pixel_sample_state; let ray_queue = &self.ray_queues[0]; let x_resolution = pixel_bounds.p_max.x() - pixel_bounds.p_min.x(); // Iterate the whole queue, exactly like pbrt's ParallelFor(maxQueueSize). // The loop index IS the pixelSampleState key; pPixel is derived from it, // and every later kernel addresses state by this same absolute index. (0..self.max_queue_size as usize) .into_par_iter() .for_each(|pixel_index| { let p_pixel = Point2i::new( pixel_bounds.p_min.x() + (pixel_index as i32 % x_resolution), y0 + (pixel_index as i32 / x_resolution), ); pixel_sample_state.p_pixel.set(pixel_index, p_pixel); // Skipped pixels contribute nothing; their slots are simply never // populated, and update_film filters them by the same bounds test. if !pixel_bounds.contains_exclusive(p_pixel) { return; } let mut sampler = sampler_proto.clone(); sampler.start_pixel_sample(p_pixel, sample_index as i32, Some(0)); let lu = sampler.get1d(); let lambda = film.sample_wavelengths(lu); let camera_sample = get_camera_sample(&mut sampler, p_pixel, &filter); pixel_sample_state .l .set(pixel_index, SampledSpectrum::new(0.0)); pixel_sample_state.lambda.set(pixel_index, lambda); pixel_sample_state .filter_weight .set(pixel_index, camera_sample.filter_weight); pixel_sample_state .p_film .set(pixel_index, camera_sample.p_film); let Some(camera_ray) = camera.generate_ray(camera_sample, &lambda) else { pixel_sample_state .camera_ray_weight .set(pixel_index, SampledSpectrum::new(0.0)); return; }; pixel_sample_state .camera_ray_weight .set(pixel_index, camera_ray.weight); ray_queue.push(RayWorkItem { ray: camera_ray.ray, depth: 0, pixel_index: pixel_index as u32, lambda, beta: SampledSpectrum::new(1.0), r_u: SampledSpectrum::new(1.0), r_l: SampledSpectrum::new(1.0), prev_intr_ctx: LightSampleContext::default(), eta_scale: 1.0, specular_bounce: 0, any_non_specular_bounces: 0, }); }); } /// Evaluate infinite lights. fn handle_escaped_rays(&self) { let n = self.escaped_ray_queue.size(); let infinite_lights = &self.infinite_lights; let light_sampler = &self.light_sampler; let pixel_sample_state = &self.pixel_sample_state; let escaped_ray_queue = &self.escaped_ray_queue; (0..n as usize).into_par_iter().for_each(|i| { let w = unsafe { escaped_ray_queue.storage.get(i) }; let mut l_contrib = SampledSpectrum::new(0.0); for light_ptr in infinite_lights { let light = light_ptr.get().unwrap(); let ray = Ray::new(w.ray_o, w.ray_d, None, Ptr::null()); let le = light.le(&ray, &w.lambda); if le.is_black() { continue; } if w.depth == 0 || w.specular_bounce { l_contrib += w.beta * le / w.r_u.average(); } else { // MIS: combine BSDF and light sampling weights via ratio tracking let ctx = w.prev_intr_ctx; let light_choice_pdf = light_sampler.pmf_with_context(&ctx, light); let r_l = w.r_l * light_choice_pdf * light.pdf_li(&ctx, w.ray_d, true); l_contrib += w.beta * le / (w.r_u + r_l).average(); } } if !l_contrib.is_black() { let pi = w.pixel_index as usize; let mut l = pixel_sample_state.l.get(pi); l += l_contrib; pixel_sample_state.l.set(pi, l); } }); } fn handle_emissive_intersections(&self) { let n = self.hit_area_light_queue.size(); let light_sampler = &self.light_sampler; let pixel_sample_state = &self.pixel_sample_state; let hit_area_light_queue = &self.hit_area_light_queue; (0..n as usize).into_par_iter().for_each(|i| { let w = unsafe { hit_area_light_queue.storage.get(i) }; let light = w.area_light.get().unwrap(); let le = light.l(w.p, w.n, w.uv, w.wo, &w.lambda); if le.is_black() { return; } let l_contrib = if w.depth == 0 || w.specular_bounce { w.beta * le / w.r_u.average() } else { let ctx = w.prev_intr_ctx; let light_choice_pdf = light_sampler.pmf_with_context(&ctx, light); // wi from previous interaction to this light hit let wi = (w.p - Point3f::from(ctx.pi)).normalize(); let light_pdf = light_choice_pdf * light.pdf_li(&ctx, wi, true); let r_l = w.r_l * light_pdf; w.beta * le / (w.r_u + r_l).average() }; if !l_contrib.is_black() { let pi = w.pixel_index as usize; let mut l = pixel_sample_state.l.get(pi); l += l_contrib; pixel_sample_state.l.set(pi, l); } }); } fn evaluate_materials_and_bsdfs(&mut self, depth: u32) { self.evaluate_material_queue_impl(depth, false); self.evaluate_material_queue_impl(depth, true); } fn evaluate_material_queue_impl(&mut self, depth: u32, use_universal: bool) { let queue = if use_universal { &self.universal_eval_material_queue } else { &self.basic_eval_material_queue }; let n = queue.size(); let next = ((depth + 1) % 2) as usize; let pixel_sample_state = &self.pixel_sample_state; let light_sampler = &self.light_sampler; let shadow_ray_queue = &self.shadow_ray_queue; let next_ray_queue = &self.ray_queues[next]; let regularize = self.regularize; (0..n as usize).into_par_iter().for_each(|i| { let w = unsafe { queue.storage.get(i) }; let pi = w.pixel_index as usize; let rs = pixel_sample_state.samples.get(pi); let Some(material) = w.material.get() else { return; }; let tex_eval = UniversalTextureEvaluator; let ctx = MaterialEvalContext { texture: TextureEvalContext { p: w.p.into(), dpdx: Vector3f::zero(), dpdy: Vector3f::zero(), n: w.n, uv: w.uv, dudx: 0.0, dudy: 0.0, dvdx: 0.0, dvdy: 0.0, face_index: w.face_index, }, wo: w.wo, ns: w.ns, dpdus: w.dpdus, }; let lambda = w.lambda; let mut bsdf = material.get_bsdf(&tex_eval, &ctx, &lambda); if bsdf.flags().is_empty() { return; } if regularize && w.any_non_specular_bounces { bsdf.regularize(); } // BSDF sample for indirect ray let wo = w.wo; let ns = w.ns; if let Some(bs) = bsdf.sample_f(wo, rs.indirect.uc, rs.indirect.u, FArgs::default()) { let wi = bs.wi; let mut beta = w.beta * bs.f * wi.abs_dot(ns.into()) / bs.pdf; let r_u = w.r_u; let r_l = if bs.pdf_is_proportional { r_u / bsdf.pdf(wo, wi, FArgs::default()) } else { r_u / bs.pdf }; let mut eta_scale = w.eta_scale; if bs.is_transmissive() { eta_scale *= square(bs.eta); } let rr_beta = (beta * eta_scale / r_u.average()).max_component_value(); if rr_beta < 1.0 && w.depth > 1 { let q = (1.0 - rr_beta).max(0.0_f32); if rs.indirect.rr < q { beta = SampledSpectrum::new(0.0); } else { beta /= 1.0 - q; } } if !beta.is_black() { let ray = Ray::spawn(&w.p, &w.n, w.time, wi); let any_non_specular = !bs.is_specular() || w.any_non_specular_bounces; let ctx = LightSampleContext { pi: w.p, n: w.n, ns, }; next_ray_queue.push(RayWorkItem { ray, depth: w.depth + 1, pixel_index: w.pixel_index, lambda, beta, r_u, r_l, prev_intr_ctx: ctx, eta_scale, specular_bounce: bs.is_specular() as u8, any_non_specular_bounces: any_non_specular as u8, }); } } // Direct lighting let flags = bsdf.flags(); if flags.is_non_specular() { let light_ctx = LightSampleContext { pi: w.p, n: w.n, ns, }; if let Some(sampled_light) = light_sampler.sample_with_context(&light_ctx, rs.direct.uc) { if let Some(ls) = sampled_light .light .sample_li(&light_ctx, rs.direct.u, &lambda, true) { if !ls.l.is_black() && ls.pdf > 0.0 { let wi = ls.wi; if let Some(f) = bsdf.f(wo, wi, TransportMode::Radiance) { if !f.is_black() { let beta = w.beta * f * wi.abs_dot(ns.into()); let light_pdf = ls.pdf * sampled_light.p; let bsdf_pdf = if sampled_light.light.light_type().is_delta_light() { 0.0 } else { bsdf.pdf(wo, wi, FArgs::default()) }; let r_u = w.r_u * bsdf_pdf; let r_l = w.r_u * light_pdf; let ld = beta * ls.l; let ray_o = Ray::spawn_to_interaction( &w.p, &w.n, w.time, &ls.p_light.pi(), &ls.p_light.n(), ); let t_max = (1.0 - 1e-4) * (Point3f::from(ls.p_light.p()) - ray_o.o).norm() / wi.norm(); shadow_ray_queue.push(ShadowRayWorkItem { ray_o: ray_o.o, ray_d: ray_o.d, ray_time: w.time, t_max: 1.0 - 1e-4, lambda, l_d: ld, r_u, r_l, pixel_index: w.pixel_index, }); } } } } } } }); } fn update_film(&self, _y0: i32, _y1: i32, pixel_bounds: &Bounds2i) { (0..self.max_queue_size as usize) .into_par_iter() .for_each(|pixel_index| { let p_pixel = self.pixel_sample_state.p_pixel.get(pixel_index); if !pixel_bounds.contains_exclusive(p_pixel) { return; } let l = self.pixel_sample_state.l.get(pixel_index); let camera_weight = self.pixel_sample_state.camera_ray_weight.get(pixel_index); let weighted_l = l * camera_weight; let lambda = self.pixel_sample_state.lambda.get(pixel_index); let filter_weight = self.pixel_sample_state.filter_weight.get(pixel_index); self.film .add_sample(p_pixel, weighted_l, &lambda, None, filter_weight); }); } fn generate_ray_samples(&mut self, depth: u32, sample_index: u32) { let current = (depth % 2) as usize; let ray_queue = &self.ray_queues[current]; let n = ray_queue.size(); let dimension = 6 + 7 * depth; let pixel_sample_state = &self.pixel_sample_state; let sampler_proto = &self.sampler; (0..n as usize).into_par_iter().for_each(|i| { let w = unsafe { ray_queue.storage.get(i) }; let pi = w.pixel_index as usize; let p_pixel = pixel_sample_state.p_pixel.get(pi); let mut sampler = sampler_proto.clone(); sampler.start_pixel_sample(p_pixel, sample_index as i32, Some(dimension)); self.pixel_sample_state.samples.set( pi, RaySamples { direct: DirectSamples { uc: sampler.get1d(), u: sampler.get2d(), }, indirect: IndirectSamples { uc: sampler.get1d(), u: sampler.get2d(), rr: sampler.get1d(), }, }, ); }); } }