From a6ee0a1b5232dd01be78c462d6c1e7992311e79e Mon Sep 17 00:00:00 2001 From: Wito Wiala Date: Thu, 28 May 2026 06:39:26 +0100 Subject: [PATCH] Moving on to GPU rendering --- kernels/src/intersect.rs | 513 +++++++++++++++++++++++++++++ shared/src/utils/atomic.rs | 263 +++++++++++++++ shared/src/wavefront/aggregate.rs | 216 ++++++++++++ shared/src/wavefront/integrator.rs | 483 +++++++++++++++++++++++++++ shared/src/wavefront/mod.rs | 7 + shared/src/wavefront/workitems.rs | 507 ++++++++++++++++++++++++++++ 6 files changed, 1989 insertions(+) create mode 100644 kernels/src/intersect.rs create mode 100644 shared/src/utils/atomic.rs create mode 100644 shared/src/wavefront/aggregate.rs create mode 100644 shared/src/wavefront/integrator.rs create mode 100644 shared/src/wavefront/mod.rs create mode 100644 shared/src/wavefront/workitems.rs diff --git a/kernels/src/intersect.rs b/kernels/src/intersect.rs new file mode 100644 index 0000000..479604c --- /dev/null +++ b/kernels/src/intersect.rs @@ -0,0 +1,513 @@ +#[cfg(target_os = "cuda")] +pub mod device { + use shared::core::aggregates::{BVHAggregate, LinearBVHNode}; + use shared::core::geometry::{Bounds3f, Normal3f, Point2f, Point3f, Ray, Vector3f}; + use shared::core::interaction::LightSampleContext; + use shared::core::material::Material; + use shared::core::medium::MediumInterface; + use shared::core::primitive::{Primitive, PrimitiveTrait}; + use shared::spectra::{SampledSpectrum, SampledWavelengths}; + use shared::utils::atomic::GpuAtomicU32; + use shared::utils::soa::SoABuffer; + use shared::wavefront::work_items::*; + use shared::{Float, Ptr}; + + use cuda_std::*; + + #[repr(C)] + pub struct IntersectClosestParams { + pub bvh: Ptr, + + // Input queue + pub ray_q: Ptr, + + // Output queues + pub escaped_ray_q: Ptr, + pub hit_area_light_q: Ptr, + pub basic_eval_mtl_q: Ptr, + pub universal_eval_mtl_q: Ptr, + pub next_ray_q: Ptr, + + // Persistent state + pub pixel_sample_state: Ptr, + + pub n_rays: u32, + } + + /// One thread per ray: traverse BVH, push results to output queues. + #[kernel] + pub unsafe fn intersect_closest(params: &IntersectClosestParams) { + let idx = thread::index_1d(); + if idx >= params.n_rays { + return; + } + let i = idx as usize; + + let ray_q = &*params.ray_q.as_raw(); + let work = ray_q.storage.get(i); + + let ray = Ray::new( + work.ray_o, + work.ray_d, + Some(work.ray_time), + work.ray_medium, + ); + + let pi = work.pixel_index as usize; + let pss = &*params.pixel_sample_state.as_raw(); + + // Read persistent path state + let beta = pss.beta.get(pi); + let r_u = pss.r_u.get(pi); + let r_l = pss.r_l.get(pi); + let lambda = pss.lambda.get(pi); + let depth = pss.depth.get(pi); + let specular_bounce = pss.specular_bounce.get(pi) != 0; + let prev_intr_ctx = pss.prev_intr_ctx.get(pi); + let eta_scale = pss.eta_scale.get(pi); + let any_non_specular = pss.any_non_specular_bounces.get(pi) != 0; + + // BVH traversal — mirrors BVHAggregate::intersect exactly + let bvh = &*params.bvh.as_raw(); + if bvh.nodes.is_empty() { + // No geometry — ray escapes + push_escaped(params, &work, &lambda, &beta, &r_u, &r_l, depth, specular_bounce, &prev_intr_ctx); + return; + } + + let nodes_ptr = bvh.nodes.as_ptr(); + let prims_ptr = bvh.primitives.as_ptr(); + let mut best_si = None; + let mut hit_t: Float = Float::INFINITY; + + let inv_dir = Vector3f::new( + 1.0 / ray.d.x(), + 1.0 / ray.d.y(), + 1.0 / ray.d.z(), + ); + let dir_is_neg = [ + if inv_dir.x() < 0.0 { 1u8 } else { 0 }, + if inv_dir.y() < 0.0 { 1u8 } else { 0 }, + if inv_dir.z() < 0.0 { 1u8 } else { 0 }, + ]; + + let mut to_visit_offset: u32 = 0; + let mut current_node_index: usize = 0; + // GPU stack — 64 entries matches CPU, fits in registers/local memory + let mut nodes_to_visit = [0usize; 64]; + + loop { + let node = &*nodes_ptr.add(current_node_index); + + if node.bounds.intersect_p(ray.o, hit_t, inv_dir, &dir_is_neg).is_some() { + if node.n_primitives > 0 { + // Leaf node — test primitives + let mut j = 0u16; + while j < node.n_primitives { + let prim_idx = node.primitives_offset + j as usize; + let prim = &*prims_ptr.add(prim_idx); + + if let Some(si) = prim.intersect(&ray, Some(hit_t)) { + hit_t = si.t_hit(); + best_si = Some(si); + } + j += 1; + } + + if to_visit_offset == 0 { + break; + } + to_visit_offset -= 1; + current_node_index = nodes_to_visit[to_visit_offset as usize]; + } else { + // Interior node — push far child, visit near child + if dir_is_neg[node.axis as usize] == 1 { + nodes_to_visit[to_visit_offset as usize] = current_node_index + 1; + to_visit_offset += 1; + current_node_index = node.primitives_offset; + } else { + nodes_to_visit[to_visit_offset as usize] = node.primitives_offset; + to_visit_offset += 1; + current_node_index += 1; + } + } + } else { + if to_visit_offset == 0 { + break; + } + to_visit_offset -= 1; + current_node_index = nodes_to_visit[to_visit_offset as usize]; + } + } + + // Sort result into output queues + let Some(si) = best_si else { + push_escaped(params, &work, &lambda, &beta, &r_u, &r_l, depth, specular_bounce, &prev_intr_ctx); + return; + }; + + let intr = &si.intr; + + // Null material — medium interface, re-queue ray + if intr.material.is_null() { + let next_q = &*params.next_ray_q.as_raw(); + next_q.push(RayWorkItem { + ray_o: intr.p(), + ray_d: work.ray_d, + ray_time: work.ray_time, + ray_medium: work.ray_medium, + has_differentials: work.has_differentials, + differential: work.differential, + pixel_index: work.pixel_index, + }); + return; + } + + // Area light hit + if !intr.area_light.is_null() { + let q = &*params.hit_area_light_q.as_raw(); + q.push(HitAreaLightWorkItem { + area_light: intr.area_light, + p: intr.p(), + n: intr.n(), + uv: intr.common.uv, + wo: -work.ray_d, + lambda, + pixel_index: work.pixel_index, + beta, + r_u, + r_l, + depth, + specular_bounce, + prev_intr_ctx, + }); + } + + // Material evaluation: push to appropriate queue + // For now, push everything to universal eval queue. + // Basic vs universal split requires checking can_evaluate_textures + // on the material, which we can refine later. + let q = &*params.universal_eval_mtl_q.as_raw(); + q.push(MaterialEvalWorkItem { + p: intr.p(), + n: intr.n(), + ns: intr.shading.n, + dpdu: intr.shading.dpdu, + dpdv: intr.shading.dpdv, + uv: intr.common.uv, + wo: -work.ray_d, + time: work.ray_time, + face_index: intr.face_index, + material: intr.material, + area_light: intr.area_light, + medium_interface: intr.common.medium_interface, + pixel_index: work.pixel_index, + lambda, + beta, + r_u, + any_non_specular_bounces: any_non_specular, + depth, + eta_scale, + }); + } + + /// Shadow ray kernel — one thread per shadow ray, binary occlusion test. + #[kernel] + pub unsafe fn intersect_shadow(params: &IntersectShadowParams) { + let idx = thread::index_1d(); + if idx >= params.n_rays { + return; + } + let i = idx as usize; + + let shadow_q = &*params.shadow_ray_q.as_raw(); + let work = shadow_q.storage.get(i); + + let ray = Ray::new( + work.ray_o, + work.ray_d, + Some(work.ray_time), + Ptr::null(), + ); + + // BVH any-hit traversal + let bvh = &*params.bvh.as_raw(); + let occluded = bvh_intersect_p(bvh, &ray, work.t_max); + + // If NOT occluded, add direct lighting contribution + if !occluded { + let pss = &*params.pixel_sample_state.as_raw(); + let pi = work.pixel_index as usize; + // Atomic add to each spectral channel + let mut l = pss.l.get(pi); + l += work.l_d; + pss.l.set(pi, l); + // NOTE: This set is not atomic per-channel. For correctness + // when multiple shadow rays hit the same pixel, we'd need + // per-channel AtomicFloat. For now this works because each + // pixel has at most one shadow ray in flight per depth. + } + } + + /// Launch parameters for shadow ray kernel. + #[repr(C)] + pub struct IntersectShadowParams { + pub bvh: Ptr, + pub shadow_ray_q: Ptr, + pub pixel_sample_state: Ptr, + pub n_rays: u32, + } + + // -- Helper functions -- + + unsafe fn push_escaped( + params: &IntersectClosestParams, + work: &RayWorkItem, + lambda: &SampledWavelengths, + beta: &SampledSpectrum, + r_u: &SampledSpectrum, + r_l: &SampledSpectrum, + depth: u32, + specular_bounce: bool, + prev_intr_ctx: &LightSampleContext, + ) { + let q = &*params.escaped_ray_q.as_raw(); + q.push(EscapedRayWorkItem { + ray_o: work.ray_o, + ray_d: work.ray_d, + lambda: *lambda, + pixel_index: work.pixel_index, + beta: *beta, + r_u: *r_u, + r_l: *r_l, + depth, + specular_bounce, + prev_intr_ctx: *prev_intr_ctx, + }); + } + + /// BVH any-hit traversal for shadow rays — returns true if occluded. + unsafe fn bvh_intersect_p(bvh: &BVHAggregate, ray: &Ray, t_max: Float) -> bool { + if bvh.nodes.is_empty() { + return false; + } + + let nodes_ptr = bvh.nodes.as_ptr(); + let prims_ptr = bvh.primitives.as_ptr(); + + let inv_dir = Vector3f::new( + 1.0 / ray.d.x(), + 1.0 / ray.d.y(), + 1.0 / ray.d.z(), + ); + let dir_is_neg = [ + if inv_dir.x() < 0.0 { 1u8 } else { 0 }, + if inv_dir.y() < 0.0 { 1u8 } else { 0 }, + if inv_dir.z() < 0.0 { 1u8 } else { 0 }, + ]; + + let mut to_visit_offset: u32 = 0; + let mut current_node_index: usize = 0; + let mut nodes_to_visit = [0usize; 64]; + + loop { + let node = &*nodes_ptr.add(current_node_index); + + if node.bounds.intersect_p(ray.o, t_max, inv_dir, &dir_is_neg).is_some() { + if node.n_primitives > 0 { + let mut j = 0u16; + while j < node.n_primitives { + let prim_idx = node.primitives_offset + j as usize; + let prim = &*prims_ptr.add(prim_idx); + if prim.intersect_p(ray, Some(t_max)) { + return true; + } + j += 1; + } + if to_visit_offset == 0 { + break; + } + to_visit_offset -= 1; + current_node_index = nodes_to_visit[to_visit_offset as usize]; + } else { + if dir_is_neg[node.axis as usize] == 1 { + nodes_to_visit[to_visit_offset as usize] = current_node_index + 1; + to_visit_offset += 1; + current_node_index = node.primitives_offset; + } else { + nodes_to_visit[to_visit_offset as usize] = node.primitives_offset; + to_visit_offset += 1; + current_node_index += 1; + } + } + } else { + if to_visit_offset == 0 { + break; + } + to_visit_offset -= 1; + current_node_index = nodes_to_visit[to_visit_offset as usize]; + } + } + false + } +} + + +#[cfg(feature = "cuda")] +pub mod host { + use crate::core::aggregates::BVHAggregate; + use crate::core::geometry::Bounds3f; + use crate::core::primitive::PrimitiveTrait; + use crate::wavefront::aggregate::WavefrontAggregate; + use crate::wavefront::work_items::*; + use crate::{Ptr, Float}; + + use cust::prelude::*; + use cust::launch; + + /// CUDA aggregate — holds the BVH and the compiled kernel module. + pub struct CudaAggregate { + pub bvh: Ptr, + pub module: Module, + pub stream: Stream, + } + + impl CudaAggregate { + pub fn new(bvh: Ptr, ptx_data: &str) -> cust::error::CudaResult { + // Initialize CUDA context (assumes cust::init() already called) + let module = Module::from_ptx(ptx_data, &[])?; + let stream = Stream::new(StreamFlags::NON_BLOCKING, None)?; + Ok(Self { bvh, module, stream }) + } + + fn launch_intersect_closest( + &self, + n_rays: u32, + ray_q: &RayQueue, + escaped_ray_q: &EscapedRayQueue, + hit_area_light_q: &HitAreaLightQueue, + basic_eval_mtl_q: &MaterialEvalQueue, + universal_eval_mtl_q: &MaterialEvalQueue, + next_ray_q: &RayQueue, + pixel_sample_state: &PixelSampleState, + ) -> cust::error::CudaResult<()> { + if n_rays == 0 { + return Ok(()); + } + + let func = self.module.get_function("intersect_closest")?; + + // Build launch params in unified memory + let params = super::device::IntersectClosestParams { + bvh: self.bvh, + ray_q: Ptr::from(ray_q), + escaped_ray_q: Ptr::from(escaped_ray_q), + hit_area_light_q: Ptr::from(hit_area_light_q), + basic_eval_mtl_q: Ptr::from(basic_eval_mtl_q), + universal_eval_mtl_q: Ptr::from(universal_eval_mtl_q), + next_ray_q: Ptr::from(next_ray_q), + pixel_sample_state: Ptr::from(pixel_sample_state), + n_rays, + }; + + let block_size = 256u32; + let grid_size = (n_rays + block_size - 1) / block_size; + + unsafe { + launch!( + func<<>>( + ¶ms + ) + )?; + } + self.stream.synchronize()?; + Ok(()) + } + + fn launch_intersect_shadow( + &self, + n_rays: u32, + shadow_ray_q: &ShadowRayQueue, + pixel_sample_state: &PixelSampleState, + ) -> cust::error::CudaResult<()> { + if n_rays == 0 { + return Ok(()); + } + + let func = self.module.get_function("intersect_shadow")?; + + let params = super::device::IntersectShadowParams { + bvh: self.bvh, + shadow_ray_q: Ptr::from(shadow_ray_q), + pixel_sample_state: Ptr::from(pixel_sample_state), + n_rays, + }; + + let block_size = 256u32; + let grid_size = (n_rays + block_size - 1) / block_size; + + unsafe { + launch!( + func<<>>( + ¶ms + ) + )?; + } + self.stream.synchronize()?; + Ok(()) + } + } + + impl WavefrontAggregate for CudaAggregate { + fn bounds(&self) -> Bounds3f { + self.bvh.get().map(|b| b.bounds()).unwrap_or_default() + } + + fn intersect_closest( + &self, + max_rays: usize, + ray_q: &RayQueue, + escaped_ray_q: &EscapedRayQueue, + hit_area_light_q: &HitAreaLightQueue, + basic_eval_mtl_q: &MaterialEvalQueue, + universal_eval_mtl_q: &MaterialEvalQueue, + next_ray_q: &RayQueue, + pixel_sample_state: &PixelSampleState, + ) { + let n_rays = ray_q.size().min(max_rays as u32); + self.launch_intersect_closest( + n_rays, + ray_q, + escaped_ray_q, + hit_area_light_q, + basic_eval_mtl_q, + universal_eval_mtl_q, + next_ray_q, + pixel_sample_state, + ) + .expect("CUDA intersect_closest kernel launch failed"); + } + + fn intersect_shadow( + &self, + max_rays: usize, + shadow_ray_q: &ShadowRayQueue, + pixel_sample_state: &PixelSampleState, + ) { + let n_rays = shadow_ray_q.size().min(max_rays as u32); + self.launch_intersect_shadow(n_rays, shadow_ray_q, pixel_sample_state) + .expect("CUDA intersect_shadow kernel launch failed"); + } + + fn intersect_shadow_tr( + &self, + max_rays: usize, + shadow_ray_q: &ShadowRayQueue, + pixel_sample_state: &PixelSampleState, + ) { + // Without participating media, shadow_tr is the same as shadow + self.intersect_shadow(max_rays, shadow_ray_q, pixel_sample_state); + } + } +} + diff --git a/shared/src/utils/atomic.rs b/shared/src/utils/atomic.rs new file mode 100644 index 0000000..3239e6c --- /dev/null +++ b/shared/src/utils/atomic.rs @@ -0,0 +1,263 @@ +use crate::Float; + +pub const SCOPE_DEVICE: u32 = 1; + +#[allow(dead_code)] +pub const SCOPE_WORKGROUP: u32 = 2; + +pub const SEMANTICS_RELAXED: u32 = 0x0; + +#[allow(dead_code)] +pub const SEMANTICS_ACQUIRE_RELEASE: u32 = 0x8; + +#[repr(C)] +#[derive(Debug)] +pub struct AtomicU32 { + value: u32, +} + +impl Default for AtomicU32 { + fn default() -> Self { + Self::new(0) + } +} + +impl Clone for AtomicU32 { + fn clone(&self) -> Self { + Self::new(self.load()) + } +} + +impl AtomicU32 { + pub fn new(val: u32) -> Self { + Self { value: val } + } + + #[cfg(not(any(target_arch = "spirv", feature = "cuda")))] + #[inline(always)] + pub fn load(&self) -> u32 { + let atomic = unsafe { + &*(core::ptr::addr_of!(self.value) as *const core::sync::atomic::AtomicU32) + }; + atomic.load(core::sync::atomic::Ordering::Relaxed) + } + + #[cfg(not(any(target_arch = "spirv", feature = "cuda")))] + #[inline(always)] + pub fn store(&self, val: u32) { + let atomic = unsafe { + &*(core::ptr::addr_of!(self.value) as *const core::sync::atomic::AtomicU32) + }; + atomic.store(val, core::sync::atomic::Ordering::Relaxed); + } + + #[cfg(not(any(target_arch = "spirv", feature = "cuda")))] + #[inline(always)] + pub fn fetch_add(&self, val: u32) -> u32 { + let atomic = unsafe { + &*(core::ptr::addr_of!(self.value) as *const core::sync::atomic::AtomicU32) + }; + atomic.fetch_add(val, core::sync::atomic::Ordering::Relaxed) + } + + #[cfg(not(any(target_arch = "spirv", feature = "cuda")))] + #[inline(always)] + pub fn compare_exchange(&self, expected: u32, desired: u32) -> Result { + let atomic = unsafe { + &*(core::ptr::addr_of!(self.value) as *const core::sync::atomic::AtomicU32) + }; + atomic.compare_exchange_weak( + expected, + desired, + core::sync::atomic::Ordering::Relaxed, + core::sync::atomic::Ordering::Relaxed, + ) + } + + #[cfg(target_arch = "spirv")] + #[inline(always)] + pub fn load(&self) -> u32 { + unsafe { + spirv_std::arch::atomic_load::( + &self.value, + ) + } + } + + #[cfg(target_arch = "spirv")] + #[inline(always)] + pub fn store(&self, val: u32) { + unsafe { + spirv_std::arch::atomic_store::( + &mut *core::ptr::addr_of!(self.value).cast_mut(), + val, + ); + } + } + + #[cfg(target_arch = "spirv")] + #[inline(always)] + pub fn fetch_add(&self, val: u32) -> u32 { + unsafe { + spirv_std::arch::atomic_i_add::( + &mut *core::ptr::addr_of!(self.value).cast_mut(), + val, + ) + } + } + + #[cfg(target_arch = "spirv")] + #[inline(always)] + pub fn compare_exchange(&self, expected: u32, desired: u32) -> Result { + let old = unsafe { + spirv_std::arch::atomic_compare_exchange::< + u32, + SCOPE_DEVICE, + SEMANTICS_RELAXED, + SEMANTICS_RELAXED, + >( + &mut *core::ptr::addr_of!(self.value).cast_mut(), + desired, + expected, + ) + }; + if old == expected { + Ok(old) + } else { + Err(old) + } + } + + // -- CUDA backend -- + #[cfg(feature = "cuda")] + #[inline(always)] + pub fn load(&self) -> u32 { + // CUDA volatile read for atomicity on the same SM + unsafe { core::ptr::read_volatile(&self.value) } + } + + #[cfg(feature = "cuda")] + #[inline(always)] + pub fn store(&self, val: u32) { + unsafe { + core::ptr::write_volatile( + core::ptr::addr_of!(self.value).cast_mut(), + val, + ); + } + } + + #[cfg(feature = "cuda")] + #[inline(always)] + pub fn fetch_add(&self, val: u32) -> u32 { + let ptr = core::ptr::addr_of!(self.value).cast_mut(); + let mut old: u32; + unsafe { + core::arch::asm!( + "atom.add.u32 {old}, [{ptr}], {val};", + old = out(reg32) old, + ptr = in(reg64) ptr, + val = in(reg32) val, + ); + } + old + } + + #[cfg(feature = "cuda")] + #[inline(always)] + pub fn compare_exchange(&self, expected: u32, desired: u32) -> Result { + let ptr = core::ptr::addr_of!(self.value).cast_mut(); + let mut old: u32; + unsafe { + core::arch::asm!( + "atom.cas.b32 {old}, [{ptr}], {expected}, {desired};", + old = out(reg32) old, + ptr = in(reg64) ptr, + expected = in(reg32) expected, + desired = in(reg32) desired, + ); + } + if old == expected { + Ok(old) + } else { + Err(old) + } + } +} + +#[repr(C)] +#[derive(Debug)] +pub struct AtomicF32 { + bits: AtomicU32, +} + +impl Default for AtomicF32 { + fn default() -> Self { + Self::new(0.0) + } +} + +impl Clone for AtomicF32 { + fn clone(&self) -> Self { + Self::new(self.get()) + } +} + +impl AtomicF32 { + pub fn new(val: Float) -> Self { + Self { + bits: AtomicU32::new(val.to_bits()), + } + } + + pub fn get(&self) -> Float { + Float::from_bits(self.bits.load()) + } + + pub fn set(&self, val: Float) { + self.bits.store(val.to_bits()); + } + + #[cfg(not(any(target_arch = "spirv", feature = "cuda")))] + #[inline(always)] + pub fn add(&self, val: Float) { + let mut current_bits = self.bits.load(); + loop { + let current_val = Float::from_bits(current_bits); + let new_val = current_val + val; + let new_bits = new_val.to_bits(); + match self.bits.compare_exchange(current_bits, new_bits) { + Ok(_) => break, + Err(x) => current_bits = x, + } + } + } + + #[cfg(target_arch = "spirv")] + #[inline(always)] + pub fn add(&self, val: Float) { + unsafe { + let float_ptr = core::ptr::addr_of!(self.bits.value) as *mut Float; + spirv_std::arch::atomic_f_add::( + &mut *float_ptr, + val, + ); + } + } + + #[cfg(feature = "cuda")] + #[inline(always)] + pub fn add(&self, val: Float) { + let ptr = core::ptr::addr_of!(self.bits.value) as *mut Float; + unsafe { + core::arch::asm!( + "atom.add.f32 {old}, [{ptr}], {val};", + old = out(reg32) _, + ptr = in(reg64) ptr, + val = in(reg32) val.to_bits(), + ); + } + } +} + +pub type AtomicFloat = AtomicF32; diff --git a/shared/src/wavefront/aggregate.rs b/shared/src/wavefront/aggregate.rs new file mode 100644 index 0000000..ba9406b --- /dev/null +++ b/shared/src/wavefront/aggregate.rs @@ -0,0 +1,216 @@ +use crate::core::geometry::{Bounds3f, Ray, Vector3f}; +use crate::core::interaction::InteractionTrait; +use crate::core::material::MaterialTrait; +use crate::core::primitive::{Primitive, PrimitiveTrait}; +use crate::core::texture::{TextureEvaluator, UniversalTextureEvaluator}; +use crate::wavefront::workitems::*; + +pub trait WavefrontAggregate { + fn bounds(&self) -> Bounds3f; + + fn intersect_closest( + &self, + max_rays: usize, + ray_q: &RayQueue, + escaped_ray_q: &EscapedRayQueue, + hit_area_light_q: &HitAreaLightQueue, + basic_eval_mtl_q: &MaterialEvalQueue, + universal_eval_mtl_q: &MaterialEvalQueue, + next_ray_q: &RayQueue, + pixel_sample_state: &PixelSampleState, + ); + + fn intersect_shadow( + &self, + max_rays: usize, + shadow_ray_q: &ShadowRayQueue, + pixel_sample_state: &PixelSampleState, + ); + + fn intersect_shadow_tr( + &self, + max_rays: usize, + shadow_ray_q: &ShadowRayQueue, + pixel_sample_state: &PixelSampleState, + ); + + // fn intersect_one_random( + // &self, + // max_rays: usize, + // subsurface_scatte_q: &mut SubsurfaceScatterQueue, + // ) { + // todo!() + // } +} + +pub struct CpuAggregate { + pub aggregate: Primitive, +} + +impl CpuAggregate { + pub fn new(aggregate: Primitive) -> Self { + Self { aggregate } + } +} + +impl WavefrontAggregate for CpuAggregate { + fn bounds(&self) -> Bounds3f { + self.aggregate.bounds() + } + + fn intersect_closest( + &self, + max_rays: usize, + ray_q: &RayQueue, + escaped_ray_q: &EscapedRayQueue, + hit_area_light_q: &HitAreaLightQueue, + basic_eval_mtl_q: &MaterialEvalQueue, + universal_eval_mtl_q: &MaterialEvalQueue, + next_ray_q: &RayQueue, + pixel_sample_state: &PixelSampleState, + ) { + let n_rays = ray_q.size().min(max_rays as u32); + + for i in 0..n_rays as usize { + let work = unsafe { ray_q.get(i) }; + + let ray = Ray::new(work.ray_o, work.ray_d, Some(work.ray_time), work.ray_medium); + + // Read path state from PixelSampleState + let pi = work.pixel_index as usize; + let beta = pixel_sample_state.beta.get(pi); + let r_u = pixel_sample_state.r_u.get(pi); + let r_l = pixel_sample_state.r_l.get(pi); + let lambda = pixel_sample_state.lambda.get(pi); + let depth = pixel_sample_state.depth.get(pi); + let specular_bounce = pixel_sample_state.specular_bounce.get(pi) != 0; + let any_non_specular = pixel_sample_state.any_non_specular_bounces.get(pi) != 0; + let eta_scale = pixel_sample_state.eta_scale.get(pi); + let prev_intr_ctx = pixel_sample_state.prev_intr_ctx.get(pi); + + let Some(si) = self.aggregate.intersect(&ray, None) else { + // Ray escaped — push to escaped ray queue + escaped_ray_q.push(EscapedRayWorkItem { + ray_o: work.ray_o, + ray_d: work.ray_d, + lambda, + pixel_index: work.pixel_index, + beta, + r_u, + r_l, + depth, + specular_bounce, + prev_intr_ctx, + }); + continue; + }; + + let intr = &si.intr; + + // Check for null material (medium interface) — re-queue the ray + if intr.material.is_null() { + // Skip intersection and continue ray + // TODO: offset ray origin past the intersection + next_ray_q.push(RayWorkItem { + ray_o: intr.p(), + ray_d: work.ray_d, + ray_time: work.ray_time, + ray_medium: work.ray_medium, + has_differentials: work.has_differentials, + differential: work.differential, + pixel_index: work.pixel_index, + }); + continue; + } + + // Check for area light hit + if !intr.area_light.is_null() { + hit_area_light_q.push(HitAreaLightWorkItem { + area_light: intr.area_light, + p: intr.p(), + n: intr.n(), + uv: intr.common.uv, + wo: -work.ray_d, + lambda, + pixel_index: work.pixel_index, + beta, + r_u, + r_l, + depth, + specular_bounce, + prev_intr_ctx, + }); + } + + // Determine which material evaluation queue to use based on + // whether the material's textures can be evaluated with the + // basic evaluator (cheaper) or need the universal one. + let material = *intr.material.get().unwrap(); + let eval_q = if material.can_evaluate_textures(&UniversalTextureEvaluator) { + basic_eval_mtl_q + } else { + universal_eval_mtl_q + }; + + eval_q.push(MaterialEvalWorkItem { + p: intr.p(), + n: intr.n(), + ns: intr.shading.n, + dpdu: intr.shading.dpdu, + dpdv: intr.shading.dpdv, + uv: intr.common.uv, + wo: -work.ray_d, + time: work.ray_time, + face_index: intr.face_index, + material: intr.material, + area_light: intr.area_light, + medium_interface: intr.common.medium_interface, + pixel_index: work.pixel_index, + lambda, + beta, + r_u, + any_non_specular_bounces: any_non_specular, + depth, + eta_scale, + }); + } + } + + fn intersect_shadow( + &self, + max_rays: usize, + shadow_ray_q: &ShadowRayQueue, + pixel_sample_state: &PixelSampleState, + ) { + let n_rays = shadow_ray_q.size().min(max_rays as u32); + + for i in 0..n_rays as usize { + let work = unsafe { shadow_ray_q.get(i) }; + + let ray = Ray::new( + work.ray_o, + work.ray_d, + Some(work.ray_time), + crate::Ptr::null(), + ); + + // If the shadow ray is NOT occluded, add the direct lighting + // contribution to the pixel's accumulated radiance. + if !self.aggregate.intersect_p(&ray, Some(work.t_max)) { + let pi = work.pixel_index as usize; + let mut l = pixel_sample_state.l.get(pi); + l += work.l_d; + pixel_sample_state.l.set(pi, l); + } + } + } + + fn intersect_shadow_tr( + &self, + max_rays: usize, + shadow_ray_q: &ShadowRayQueue, + pixel_sample_state: &PixelSampleState, + ) { + self.intersect_shadow(max_rays, shadow_ray_q, pixel_sample_state); + } +} diff --git a/shared/src/wavefront/integrator.rs b/shared/src/wavefront/integrator.rs new file mode 100644 index 0000000..d4274c5 --- /dev/null +++ b/shared/src/wavefront/integrator.rs @@ -0,0 +1,483 @@ +use crate::core::bxdf::FArgs; +use crate::core::bxdf::TransportMode; +use crate::core::camera::{Camera, CameraTrait}; +use crate::core::film::Film; +use crate::core::filter::{Filter, FilterTrait}; +use crate::core::geometry::{ + Bounds2i, Point2f, Point2i, Point3f, Point3fi, Ray, RayDifferential, Vector2f, Vector3f, + VectorLike, +}; +use crate::core::interaction::InteractionTrait; +use crate::core::light::{Light, LightSampleContext, LightTrait}; +use crate::core::material::{MaterialEvalContext, MaterialTrait}; +use crate::core::sampler::{CameraSample, Sampler, SamplerTrait}; +use crate::core::texture::{TextureEvalContext, UniversalTextureEvaluator}; +use crate::lights::sampler::{LightSampler, LightSamplerTrait}; +use crate::spectra::{SampledSpectrum, SampledWavelengths}; +use crate::utils::math::square; +use crate::utils::sampling::power_heuristic; +use crate::utils::soa::{SoA, SoAAllocator, WorkQueue}; +use crate::wavefront::aggregate::WavefrontAggregate; +use crate::wavefront::workitems::*; +use crate::{Float, GVec, Ptr}; + +pub struct WavefrontPathIntegrator { + pub aggregate: A, + pub camera: Camera, + pub film: Film, + pub filter: Filter, + pub sampler: Sampler, + pub max_depth: u32, + pub samples_per_pixel: u32, + pub regularize: bool, + + // Lights + pub infinite_lights: GVec>, + + // Queue capacity = resolution.x * scanlines_per_pass + pub max_queue_size: u32, + pub scanlines_per_pass: u32, + + pub ray_queues: [RayQueue; 2], + pub shadow_ray_queue: ShadowRayQueue, + pub escaped_ray_queue: EscapedRayQueue, + pub hit_area_light_queue: HitAreaLightQueue, + pub basic_eval_material_queue: MaterialEvalQueue, + pub universal_eval_material_queue: MaterialEvalQueue, + pub light_sampler: LightSampler, + + // Persistent per-path state + pub pixel_sample_state: PixelSampleState, +} + +impl WavefrontPathIntegrator { + pub fn render(&mut self) { + let pixel_bounds = self.film.pixel_bounds(); + let resolution = pixel_bounds.diagonal(); + + for sample_index in 0..self.samples_per_pixel { + // Process image in scanline batches + let mut y0 = pixel_bounds.p_min.y(); + while y0 < pixel_bounds.p_max.y() { + let y1 = (y0 + self.scanlines_per_pass as i32).min(pixel_bounds.p_max.y()); + + // Reset the primary ray queue for this set + self.ray_queues[0].reset(); + + self.generate_camera_rays(y0, y1, sample_index, &pixel_bounds); + + for depth in 0..=self.max_depth { + let current = (depth % 2) as usize; + let next = ((depth + 1) % 2) as usize; + + // Reset output queues before intersection + self.ray_queues[next].reset(); + self.escaped_ray_queue.reset(); + self.hit_area_light_queue.reset(); + self.basic_eval_material_queue.reset(); + self.universal_eval_material_queue.reset(); + self.shadow_ray_queue.reset(); + + // Skip if no rays to trace + if self.ray_queues[current].size() == 0 { + break; + } + + // Sorting of rays into output queues + self.aggregate.intersect_closest( + self.max_queue_size as usize, + &self.ray_queues[current], + &self.escaped_ray_queue, + &self.hit_area_light_queue, + &self.basic_eval_material_queue, + &self.universal_eval_material_queue, + &self.ray_queues[next], + &self.pixel_sample_state, + ); + + // Infinite light contributions + self.handle_escaped_rays(); + + // Area light contributions + self.handle_emissive_intersections(); + + // Last depth — don't evaluate materials or sample lights + if depth == self.max_depth { + break; + } + + // Evaluate materials, sample BSDFs, sample direct lighting + // This pushes to shadow_ray_queue and ray_queues[next] + self.evaluate_materials_and_bsdfs(depth); + + // Add direct lighting to pixels + self.aggregate.intersect_shadow( + self.max_queue_size as usize, + &self.shadow_ray_queue, + &self.pixel_sample_state, + ); + } + + // Update film from accumulated pixel sample state + self.update_film(y0, y1, &pixel_bounds); + + y0 = y1; + } + } + } + + /// Stage 1: Generate camera rays for scanlines [y0, y1). + fn generate_camera_rays( + &mut self, + y0: i32, + y1: i32, + sample_index: u32, + pixel_bounds: &Bounds2i, + ) { + // For each pixel in the scanline range, generate a camera ray + // and push it to the ray queue. Also initialize the PixelSampleState. + for y in y0..y1 { + for x in pixel_bounds.p_min.x()..pixel_bounds.p_max.x() { + let p_pixel = Point2i::new(x, y); + + // TODO: proper sampler state per pixel/sample + // For now, use a simple approach + self.sampler + .start_pixel_sample(p_pixel, sample_index as i32, Some(0)); + + let lambda = SampledWavelengths::sample_visible(self.sampler.get1d()); + + let camera_sample = crate::core::sampler::get_camera_sample( + &mut self.sampler, + p_pixel, + &self.filter, + ); + + let Some(camera_ray) = self.camera.generate_ray(camera_sample, &lambda) else { + continue; + }; + + // Compute pixel index for this sample + let pixel_index = self.ray_queues[0].size(); + + // Initialize persistent pixel state + let pi = pixel_index as usize; + self.pixel_sample_state.l.set(pi, SampledSpectrum::new(0.0)); + self.pixel_sample_state.beta.set(pi, camera_ray.weight); + self.pixel_sample_state.lambda.set(pi, lambda); + self.pixel_sample_state + .r_u + .set(pi, SampledSpectrum::new(1.0)); + self.pixel_sample_state + .r_l + .set(pi, SampledSpectrum::new(1.0)); + self.pixel_sample_state.depth.set(pi, 0); + self.pixel_sample_state.specular_bounce.set(pi, 1); + self.pixel_sample_state.any_non_specular_bounces.set(pi, 0); + self.pixel_sample_state.eta_scale.set(pi, 1.0); + self.pixel_sample_state.p_film.set(pi, camera_sample.p_film); + self.pixel_sample_state + .filter_weight + .set(pi, camera_sample.filter_weight); + self.pixel_sample_state + .prev_intr_ctx + .set(pi, LightSampleContext::default()); + + // Push ray to queue + self.ray_queues[0].push(RayWorkItem { + ray_o: camera_ray.ray.o, + ray_d: camera_ray.ray.d, + ray_time: camera_ray.ray.time, + ray_medium: camera_ray.ray.medium, + pixel_index: pixel_index, + has_differentials: true, + differential: RayDifferential::default(), + }); + } + } + } + + /// Handle escaped rays — evaluate infinite lights. + fn handle_escaped_rays(&self) { + let n = self.escaped_ray_queue.size(); + for i in 0..n as usize { + let w = unsafe { self.escaped_ray_queue.storage.get(i) }; + + let mut l_contrib = SampledSpectrum::new(0.0); + + // Evaluate all infinite lights + for light_ptr in &self.infinite_lights { + let light = light_ptr.get().unwrap(); + let ray = crate::core::geometry::Ray::new(w.ray_o, w.ray_d, None, Ptr::null()); + let le = light.le(&ray, &w.lambda); + if le.is_black() { + continue; + } + + if w.depth == 0 || w.specular_bounce { + // No MIS for direct camera rays or specular bounces + l_contrib += w.beta * le / w.r_u.average(); + } else { + // MIS with light sampling + // TODO: compute light PDF for MIS weight + // For now, use unidirectional weight only + l_contrib += w.beta * le / w.r_u.average(); + } + } + + if !l_contrib.is_black() { + let pi = w.pixel_index as usize; + let mut l = self.pixel_sample_state.l.get(pi); + l += l_contrib; + self.pixel_sample_state.l.set(pi, l); + } + } + } + + /// Handle emissive intersections — area light contribution with MIS. + fn handle_emissive_intersections(&self) { + let n = self.hit_area_light_queue.size(); + for i in 0..n as usize { + let w = unsafe { self.hit_area_light_queue.storage.get(i) }; + + let light = w.area_light.get().unwrap(); + let le = light.l(w.p, w.n, w.uv, w.wo, &w.lambda); + if le.is_black() { + continue; + } + + let l_contrib = if w.depth == 0 || w.specular_bounce { + w.beta * le / w.r_u.average() + } else { + // MIS: combine BSDF and light sampling weights + // TODO: full MIS with light sampler PDF + w.beta * le / w.r_u.average() + }; + + if !l_contrib.is_black() { + let pi = w.pixel_index as usize; + let mut l = self.pixel_sample_state.l.get(pi); + l += l_contrib; + self.pixel_sample_state.l.set(pi, l); + } + } + } + + fn evaluate_materials_and_bsdfs(&mut self, depth: u32) { + self.evaluate_material_queue_impl(depth, false); + self.evaluate_material_queue_impl(depth, true); + } + + fn evaluate_material_queue_impl(&mut self, depth: u32, use_universal: bool) { + let queue = if use_universal { + &self.universal_eval_material_queue + } else { + &self.basic_eval_material_queue + }; + + let n = queue.size(); + let next = ((depth + 1) % 2) as usize; + + for i in 0..n as usize { + let w = unsafe { queue.storage.get(i) }; + let pi = w.pixel_index as usize; + + let lambda = self.pixel_sample_state.lambda.get(pi); + let beta = self.pixel_sample_state.beta.get(pi); + let any_non_specular = self.pixel_sample_state.any_non_specular_bounces.get(pi) != 0; + let eta_scale = self.pixel_sample_state.eta_scale.get(pi); + + let Some(material) = w.material.get() else { + continue; + }; + + let tex_eval = UniversalTextureEvaluator; + let ctx = MaterialEvalContext { + texture: TextureEvalContext { + p: w.p, + dpdx: Vector3f::zero(), + dpdy: Vector3f::zero(), + n: w.n, + uv: w.uv, + dudx: 0.0, + dudy: 0.0, + dvdx: 0.0, + dvdy: 0.0, + face_index: w.face_index, + }, + wo: w.wo, + ns: w.ns, + dpdus: w.dpdu, + }; + let mut bsdf = material.get_bsdf(&tex_eval, &ctx, &lambda); + + if bsdf.flags().is_empty() { + continue; + } + + if self.regularize && any_non_specular { + bsdf.regularize(); + } + + if depth >= self.max_depth { + continue; + } + + // Sample a light, compute contribution, + // push shadow ray with deferred visibility + if bsdf.flags().is_non_specular() { + let light_ctx = LightSampleContext { + pi: Point3fi::new_from_point(w.p), + n: w.n, + ns: w.ns, + }; + + if let Some(sampled_light) = self + .light_sampler + .sample_with_context(&light_ctx, self.sampler.get1d()) + { + if let Some(ls) = sampled_light.light.sample_li( + &light_ctx, + self.sampler.get2d(), + &lambda, + true, + ) { + if !ls.l.is_black() && ls.pdf > 0.0 { + let wi = ls.wi; + if let Some(f_val) = bsdf.f(w.wo, wi, TransportMode::Radiance) { + let f_cos = f_val * wi.abs_dot(w.ns.into()); + if !f_cos.is_black() { + let p_l = sampled_light.p * ls.pdf; + let l_d = if sampled_light.light.light_type().is_delta_light() { + beta * ls.l * f_cos / p_l + } else { + let p_b = bsdf.pdf(w.wo, wi, FArgs::default()); + let w_l = power_heuristic(1, p_l, 1, p_b); + beta * w_l * ls.l * f_cos / p_l + }; + + if !l_d.is_black() { + let ray_o = Ray::offset_origin( + &Point3fi::new_from_point(w.p), + &w.n, + &wi, + ); + let t_max = (1.0 - 1e-4) + * (Point3f::from(ls.p_light.p()) - ray_o).norm() + / wi.norm(); + + self.shadow_ray_queue.push(ShadowRayWorkItem { + ray_o, + ray_d: wi, + ray_time: w.time, + t_max, + lambda, + l_d, + pixel_index: w.pixel_index, + }); + } + } + } + } + } + } + } + + // Sample BSDF for next bounce + let wo = w.wo; + let Some(bs) = bsdf.sample_f( + wo, + self.sampler.get1d(), + self.sampler.get2d(), + FArgs::default(), + ) else { + continue; + }; + + let f_cos = bs.f * bs.wi.abs_dot(w.ns.into()); + if f_cos.is_black() || bs.pdf == 0.0 { + continue; + } + let new_beta = beta * f_cos / bs.pdf; + + let new_depth = depth + 1; + + // Russian roulette + if new_depth > 3 { + let rr_beta = new_beta.max_component_value(); + if rr_beta < 0.25 { + let q = (1.0 - rr_beta).max(0.0_f32); + if self.sampler.get1d() < q { + continue; + } + } + } + + let ray_o = Ray::offset_origin(&Point3fi::new_from_point(w.p), &w.n, &bs.wi); + + // Update PixelSampleState + self.pixel_sample_state.beta.set(pi, new_beta); + self.pixel_sample_state.depth.set(pi, new_depth); + self.pixel_sample_state + .specular_bounce + .set(pi, bs.is_specular() as u8); + self.pixel_sample_state + .any_non_specular_bounces + .set(pi, (any_non_specular || !bs.is_specular()) as u8); + self.pixel_sample_state.eta_scale.set( + pi, + if bs.is_transmissive() { + eta_scale * square(bs.eta) + } else { + eta_scale + }, + ); + self.pixel_sample_state.prev_intr_ctx.set( + pi, + LightSampleContext { + pi: Point3fi::new_from_point(w.p), + n: w.n, + ns: w.ns, + }, + ); + + // Push next bounce ray + self.ray_queues[next].push(RayWorkItem { + ray_o, + ray_d: bs.wi, + ray_time: w.time, + ray_medium: Ptr::null(), + pixel_index: w.pixel_index, + has_differentials: true, + differential: RayDifferential::default(), + }); + } + } + + /// Update film — write accumulated radiance to film pixels. + fn update_film(&self, y0: i32, y1: i32, pixel_bounds: &Bounds2i) { + // The pixel_sample_state indices map to rays generated in + // generate_camera_rays. We need to walk the same pixel order + // and read back the accumulated L values. + let mut pi = 0usize; + for y in y0..y1 { + for x in pixel_bounds.p_min.x()..pixel_bounds.p_max.x() { + let l = self.pixel_sample_state.l.get(pi); + let lambda = self.pixel_sample_state.lambda.get(pi); + let filter_weight = self.pixel_sample_state.filter_weight.get(pi); + let p_film = self.pixel_sample_state.p_film.get(pi); + + // Add sample to film + self.film.add_sample( + Point2i::new(p_film.x() as i32, p_film.y() as i32), + l, + &lambda, + Some(&crate::core::film::VisibleSurface::default()), + filter_weight, + ); + + pi += 1; + } + } + } +} diff --git a/shared/src/wavefront/mod.rs b/shared/src/wavefront/mod.rs new file mode 100644 index 0000000..c1c76d6 --- /dev/null +++ b/shared/src/wavefront/mod.rs @@ -0,0 +1,7 @@ +pub mod workitems; +pub mod aggregate; +pub mod integrator; + +pub use workitems::*; +pub use aggregate::WavefrontAggregate; + diff --git a/shared/src/wavefront/workitems.rs b/shared/src/wavefront/workitems.rs new file mode 100644 index 0000000..2524dfc --- /dev/null +++ b/shared/src/wavefront/workitems.rs @@ -0,0 +1,507 @@ +use crate::core::bxdf::BxDFFlags; +use crate::core::geometry::{Normal3f, Point2f, Point3f, Point3fi, Vector3f, RayDifferential}; +use crate::core::light::LightSampleContext; +use crate::core::light::Light; +use crate::core::material::Material; +use crate::core::medium::{Medium, MediumInterface}; +use crate::spectra::{SampledSpectrum, SampledWavelengths}; +use crate::utils::soa::{alloc_soa_buffer, SoA, SoAAllocator, SoABuffer, WorkQueue}; +use crate::{Float, Ptr}; + +/// Per-path state that persists across all wavefront depth iterations. +/// Indexed by pixel_index. Allocated once with capacity = max_queue_size. +#[repr(C)] +#[derive(Clone, Copy)] +pub struct PixelSampleState { + pub filter_weight: SoABuffer, + pub p_film: SoABuffer, + pub l: SoABuffer, + pub lambda: SoABuffer, + pub r_u: SoABuffer, + pub r_l: SoABuffer, + pub prev_intr_ctx: SoABuffer, + pub beta: SoABuffer, + pub depth: SoABuffer, + pub specular_bounce: SoABuffer, + pub any_non_specular_bounces: SoABuffer, + pub eta_scale: SoABuffer, + pub camera_ray_weight: SoABuffer, + pub visible_surface_idx: SoABuffer, +} + +impl SoA for PixelSampleState { + type Item = (); + + fn allocate(n: u32, alloc: &dyn SoAAllocator) -> Self { + Self { + filter_weight: alloc_soa_buffer(n, alloc), + p_film: alloc_soa_buffer(n, alloc), + l: alloc_soa_buffer(n, alloc), + lambda: alloc_soa_buffer(n, alloc), + r_u: alloc_soa_buffer(n, alloc), + r_l: alloc_soa_buffer(n, alloc), + prev_intr_ctx: alloc_soa_buffer(n, alloc), + beta: alloc_soa_buffer(n, alloc), + depth: alloc_soa_buffer(n, alloc), + specular_bounce: alloc_soa_buffer(n, alloc), + any_non_specular_bounces: alloc_soa_buffer(n, alloc), + eta_scale: alloc_soa_buffer(n, alloc), + camera_ray_weight: alloc_soa_buffer(n, alloc), + visible_surface_idx: alloc_soa_buffer(n, alloc), + } + } + + unsafe fn get(&self, _i: usize) -> Self::Item {} + unsafe fn set(&self, _i: usize, _v: Self::Item) {} +} + +#[repr(C)] +#[derive(Clone, Copy, Debug)] +pub struct RayWorkItem { + pub ray_o: Point3f, + pub ray_d: Vector3f, + pub ray_time: Float, + pub ray_medium: Ptr, + pub pixel_index: u32, + pub has_differentials: bool, + pub differential: RayDifferential +} + +#[repr(C)] +#[derive(Clone, Copy)] +pub struct RayWorkItemSoA { + pub ray_o: SoABuffer, + pub ray_d: SoABuffer, + pub ray_time: SoABuffer, + pub ray_medium: SoABuffer>, + pub pixel_index: SoABuffer, + pub has_differentials: SoABuffer, + pub differential: SoABuffer, +} + +impl SoA for RayWorkItemSoA { + type Item = RayWorkItem; + + fn allocate(n: u32, alloc: &dyn SoAAllocator) -> Self { + Self { + ray_o: alloc_soa_buffer(n, alloc), + ray_d: alloc_soa_buffer(n, alloc), + ray_time: alloc_soa_buffer(n, alloc), + ray_medium: alloc_soa_buffer(n, alloc), + pixel_index: alloc_soa_buffer(n, alloc), + has_differentials: alloc_soa_buffer(n, alloc), + differential: alloc_soa_buffer(n, alloc), + } + } + + unsafe fn get(&self, i: usize) -> RayWorkItem { + RayWorkItem { + ray_o: self.ray_o.get(i), + ray_d: self.ray_d.get(i), + ray_time: self.ray_time.get(i), + ray_medium: self.ray_medium.get(i), + pixel_index: self.pixel_index.get(i), + has_differentials: self.has_differentials.get(i), + differential: self.differential.get(i), + } + } + + unsafe fn set(&self, i: usize, v: RayWorkItem) { + self.ray_o.set(i, v.ray_o); + self.ray_d.set(i, v.ray_d); + self.ray_time.set(i, v.ray_time); + self.ray_medium.set(i, v.ray_medium); + self.pixel_index.set(i, v.pixel_index); + self.has_differentials.set(i, v.has_differentials); + self.differential.set(i, v.differential); + } +} + +#[repr(C)] +#[derive(Clone, Copy, Debug)] +pub struct EscapedRayWorkItem { + pub ray_o: Point3f, + pub ray_d: Vector3f, + pub lambda: SampledWavelengths, + pub pixel_index: u32, + pub beta: SampledSpectrum, + pub r_u: SampledSpectrum, + pub r_l: SampledSpectrum, + pub depth: u32, + pub specular_bounce: bool, + pub prev_intr_ctx: LightSampleContext, +} + +#[repr(C)] +#[derive(Clone, Copy)] +pub struct EscapedRayWorkItemSoA { + pub ray_o: SoABuffer, + pub ray_d: SoABuffer, + pub lambda: SoABuffer, + pub pixel_index: SoABuffer, + pub beta: SoABuffer, + pub r_u: SoABuffer, + pub r_l: SoABuffer, + pub depth: SoABuffer, + pub specular_bounce: SoABuffer, + pub prev_intr_ctx: SoABuffer, +} + +impl SoA for EscapedRayWorkItemSoA { + type Item = EscapedRayWorkItem; + + fn allocate(n: u32, alloc: &dyn SoAAllocator) -> Self { + Self { + ray_o: alloc_soa_buffer(n, alloc), + ray_d: alloc_soa_buffer(n, alloc), + lambda: alloc_soa_buffer(n, alloc), + pixel_index: alloc_soa_buffer(n, alloc), + beta: alloc_soa_buffer(n, alloc), + r_u: alloc_soa_buffer(n, alloc), + r_l: alloc_soa_buffer(n, alloc), + depth: alloc_soa_buffer(n, alloc), + specular_bounce: alloc_soa_buffer(n, alloc), + prev_intr_ctx: alloc_soa_buffer(n, alloc), + } + } + + unsafe fn get(&self, i: usize) -> EscapedRayWorkItem { + EscapedRayWorkItem { + ray_o: self.ray_o.get(i), + ray_d: self.ray_d.get(i), + lambda: self.lambda.get(i), + pixel_index: self.pixel_index.get(i), + beta: self.beta.get(i), + r_u: self.r_u.get(i), + r_l: self.r_l.get(i), + depth: self.depth.get(i), + specular_bounce: self.specular_bounce.get(i) != 0, + prev_intr_ctx: self.prev_intr_ctx.get(i), + } + } + + unsafe fn set(&self, i: usize, v: EscapedRayWorkItem) { + self.ray_o.set(i, v.ray_o); + self.ray_d.set(i, v.ray_d); + self.lambda.set(i, v.lambda); + self.pixel_index.set(i, v.pixel_index); + self.beta.set(i, v.beta); + self.r_u.set(i, v.r_u); + self.r_l.set(i, v.r_l); + self.depth.set(i, v.depth); + self.specular_bounce.set(i, v.specular_bounce as u8); + self.prev_intr_ctx.set(i, v.prev_intr_ctx); + } +} + +#[repr(C)] +#[derive(Clone, Copy, Debug)] +pub struct HitAreaLightWorkItem { + pub area_light: Ptr, + pub p: Point3f, + pub n: Normal3f, + pub uv: Point2f, + pub wo: Vector3f, + pub lambda: SampledWavelengths, + pub pixel_index: u32, + pub beta: SampledSpectrum, + pub r_u: SampledSpectrum, + pub r_l: SampledSpectrum, + pub depth: u32, + pub specular_bounce: bool, + pub prev_intr_ctx: LightSampleContext, +} + +#[repr(C)] +#[derive(Clone, Copy)] +pub struct HitAreaLightWorkItemSoA { + pub area_light: SoABuffer>, + pub p: SoABuffer, + pub n: SoABuffer, + pub uv: SoABuffer, + pub wo: SoABuffer, + pub lambda: SoABuffer, + pub pixel_index: SoABuffer, + pub beta: SoABuffer, + pub r_u: SoABuffer, + pub r_l: SoABuffer, + pub depth: SoABuffer, + pub specular_bounce: SoABuffer, + pub prev_intr_ctx: SoABuffer, +} + +impl SoA for HitAreaLightWorkItemSoA { + type Item = HitAreaLightWorkItem; + + fn allocate(n: u32, alloc: &dyn SoAAllocator) -> Self { + Self { + area_light: alloc_soa_buffer(n, alloc), + p: alloc_soa_buffer(n, alloc), + n: alloc_soa_buffer(n, alloc), + uv: alloc_soa_buffer(n, alloc), + wo: alloc_soa_buffer(n, alloc), + lambda: alloc_soa_buffer(n, alloc), + pixel_index: alloc_soa_buffer(n, alloc), + beta: alloc_soa_buffer(n, alloc), + r_u: alloc_soa_buffer(n, alloc), + r_l: alloc_soa_buffer(n, alloc), + depth: alloc_soa_buffer(n, alloc), + specular_bounce: alloc_soa_buffer(n, alloc), + prev_intr_ctx: alloc_soa_buffer(n, alloc), + } + } + + unsafe fn get(&self, i: usize) -> HitAreaLightWorkItem { + HitAreaLightWorkItem { + area_light: self.area_light.get(i), + p: self.p.get(i), + n: self.n.get(i), + uv: self.uv.get(i), + wo: self.wo.get(i), + lambda: self.lambda.get(i), + pixel_index: self.pixel_index.get(i), + beta: self.beta.get(i), + r_u: self.r_u.get(i), + r_l: self.r_l.get(i), + depth: self.depth.get(i), + specular_bounce: self.specular_bounce.get(i) != 0, + prev_intr_ctx: self.prev_intr_ctx.get(i), + } + } + + unsafe fn set(&self, i: usize, v: HitAreaLightWorkItem) { + self.area_light.set(i, v.area_light); + self.p.set(i, v.p); + self.n.set(i, v.n); + self.uv.set(i, v.uv); + self.wo.set(i, v.wo); + self.lambda.set(i, v.lambda); + self.pixel_index.set(i, v.pixel_index); + self.beta.set(i, v.beta); + self.r_u.set(i, v.r_u); + self.r_l.set(i, v.r_l); + self.depth.set(i, v.depth); + self.specular_bounce.set(i, v.specular_bounce as u8); + self.prev_intr_ctx.set(i, v.prev_intr_ctx); + } +} + +#[repr(C)] +#[derive(Clone, Copy, Debug)] +pub struct MaterialEvalWorkItem { + // Surface interaction + pub p: Point3f, + pub n: Normal3f, + pub ns: Normal3f, + pub dpdu: Vector3f, + pub dpdv: Vector3f, + pub uv: Point2f, + pub wo: Vector3f, + pub time: Float, + pub face_index: i32, + + // Material + pub material: Ptr, + pub area_light: Ptr, + + // Medium interface + pub medium_interface: MediumInterface, + + // Path state + pub pixel_index: u32, + pub lambda: SampledWavelengths, + pub beta: SampledSpectrum, + pub r_u: SampledSpectrum, + + // For next-event estimation + pub any_non_specular_bounces: bool, + pub depth: u32, + pub eta_scale: Float, +} + +#[repr(C)] +#[derive(Clone, Copy)] +pub struct MaterialEvalWorkItemSoA { + pub p: SoABuffer, + pub n: SoABuffer, + pub ns: SoABuffer, + pub dpdu: SoABuffer, + pub dpdv: SoABuffer, + pub uv: SoABuffer, + pub wo: SoABuffer, + pub time: SoABuffer, + pub face_index: SoABuffer, + pub material: SoABuffer>, + pub area_light: SoABuffer>, + pub medium_interface: SoABuffer, + pub pixel_index: SoABuffer, + pub lambda: SoABuffer, + pub beta: SoABuffer, + pub r_u: SoABuffer, + pub any_non_specular_bounces: SoABuffer, + pub depth: SoABuffer, + pub eta_scale: SoABuffer, +} + +impl SoA for MaterialEvalWorkItemSoA { + type Item = MaterialEvalWorkItem; + + fn allocate(n: u32, alloc: &dyn SoAAllocator) -> Self { + Self { + p: alloc_soa_buffer(n, alloc), + n: alloc_soa_buffer(n, alloc), + ns: alloc_soa_buffer(n, alloc), + dpdu: alloc_soa_buffer(n, alloc), + dpdv: alloc_soa_buffer(n, alloc), + uv: alloc_soa_buffer(n, alloc), + wo: alloc_soa_buffer(n, alloc), + time: alloc_soa_buffer(n, alloc), + face_index: alloc_soa_buffer(n, alloc), + material: alloc_soa_buffer(n, alloc), + area_light: alloc_soa_buffer(n, alloc), + medium_interface: alloc_soa_buffer(n, alloc), + pixel_index: alloc_soa_buffer(n, alloc), + lambda: alloc_soa_buffer(n, alloc), + beta: alloc_soa_buffer(n, alloc), + r_u: alloc_soa_buffer(n, alloc), + any_non_specular_bounces: alloc_soa_buffer(n, alloc), + depth: alloc_soa_buffer(n, alloc), + eta_scale: alloc_soa_buffer(n, alloc), + } + } + + unsafe fn get(&self, i: usize) -> MaterialEvalWorkItem { + MaterialEvalWorkItem { + p: self.p.get(i), + n: self.n.get(i), + ns: self.ns.get(i), + dpdu: self.dpdu.get(i), + dpdv: self.dpdv.get(i), + uv: self.uv.get(i), + wo: self.wo.get(i), + time: self.time.get(i), + face_index: self.face_index.get(i), + material: self.material.get(i), + area_light: self.area_light.get(i), + medium_interface: self.medium_interface.get(i), + pixel_index: self.pixel_index.get(i), + lambda: self.lambda.get(i), + beta: self.beta.get(i), + r_u: self.r_u.get(i), + any_non_specular_bounces: self.any_non_specular_bounces.get(i) != 0, + depth: self.depth.get(i), + eta_scale: self.eta_scale.get(i), + } + } + + unsafe fn set(&self, i: usize, v: MaterialEvalWorkItem) { + self.p.set(i, v.p); + self.n.set(i, v.n); + self.ns.set(i, v.ns); + self.dpdu.set(i, v.dpdu); + self.dpdv.set(i, v.dpdv); + self.uv.set(i, v.uv); + self.wo.set(i, v.wo); + self.time.set(i, v.time); + self.face_index.set(i, v.face_index); + self.material.set(i, v.material); + self.area_light.set(i, v.area_light); + self.medium_interface.set(i, v.medium_interface); + self.pixel_index.set(i, v.pixel_index); + self.lambda.set(i, v.lambda); + self.beta.set(i, v.beta); + self.r_u.set(i, v.r_u); + self.any_non_specular_bounces + .set(i, v.any_non_specular_bounces as u8); + self.depth.set(i, v.depth); + self.eta_scale.set(i, v.eta_scale); + } +} + +#[repr(C)] +#[derive(Clone, Copy, Debug)] +pub struct ShadowRayWorkItem { + pub ray_o: Point3f, + pub ray_d: Vector3f, + pub ray_time: Float, + pub t_max: Float, + pub lambda: SampledWavelengths, + pub l_d: SampledSpectrum, + pub pixel_index: u32, +} + +#[repr(C)] +#[derive(Clone, Copy)] +pub struct ShadowRayWorkItemSoA { + pub ray_o: SoABuffer, + pub ray_d: SoABuffer, + pub ray_time: SoABuffer, + pub t_max: SoABuffer, + pub lambda: SoABuffer, + pub l_d: SoABuffer, + pub pixel_index: SoABuffer, +} + +impl SoA for ShadowRayWorkItemSoA { + type Item = ShadowRayWorkItem; + + fn allocate(n: u32, alloc: &dyn SoAAllocator) -> Self { + Self { + ray_o: alloc_soa_buffer(n, alloc), + ray_d: alloc_soa_buffer(n, alloc), + ray_time: alloc_soa_buffer(n, alloc), + t_max: alloc_soa_buffer(n, alloc), + lambda: alloc_soa_buffer(n, alloc), + l_d: alloc_soa_buffer(n, alloc), + pixel_index: alloc_soa_buffer(n, alloc), + } + } + + unsafe fn get(&self, i: usize) -> ShadowRayWorkItem { + ShadowRayWorkItem { + ray_o: self.ray_o.get(i), + ray_d: self.ray_d.get(i), + ray_time: self.ray_time.get(i), + t_max: self.t_max.get(i), + lambda: self.lambda.get(i), + l_d: self.l_d.get(i), + pixel_index: self.pixel_index.get(i), + } + } + + unsafe fn set(&self, i: usize, v: ShadowRayWorkItem) { + self.ray_o.set(i, v.ray_o); + self.ray_d.set(i, v.ray_d); + self.ray_time.set(i, v.ray_time); + self.t_max.set(i, v.t_max); + self.lambda.set(i, v.lambda); + self.l_d.set(i, v.l_d); + self.pixel_index.set(i, v.pixel_index); + } +} + +#[repr(C)] +#[derive(Clone, Copy, Debug)] +pub struct MediumSampleWorkItem { + pub ray_o: Point3f, + pub ray_d: Vector3f, + pub ray_time: Float, + pub t_max: Float, + pub lambda: SampledWavelengths, + pub beta: SampledSpectrum, + pub r_u: SampledSpectrum, + pub r_l: SampledSpectrum, + pub medium: Ptr, + pub pixel_index: u32, + pub depth: u32, + pub specular_bounce: bool, + pub any_non_specular_bounces: bool, + pub eta_scale: Float, + pub prev_intr_ctx: LightSampleContext, +} + +pub type RayQueue = WorkQueue; +pub type EscapedRayQueue = WorkQueue; +pub type HitAreaLightQueue = WorkQueue; +pub type MaterialEvalQueue = WorkQueue; +pub type ShadowRayQueue = WorkQueue;