From e6d1850785b6e4ed326201076b79c19e6e1b5f42 Mon Sep 17 00:00:00 2001 From: Wito Wiala Date: Thu, 28 May 2026 06:39:05 +0100 Subject: [PATCH] Continuing cleanup --- shared/src/core/geometry/ray.rs | 55 +------------- shared/src/core/material.rs | 1 + shared/src/core/primitive.rs | 3 - shared/src/lib.rs | 5 +- shared/src/materials/coated.rs | 2 +- shared/src/utils/math.rs | 2 +- shared/src/utils/mod.rs | 63 +--------------- shared/src/utils/soa.rs | 125 +++++++++++++++++++++++++++++--- src/lib.rs | 2 +- src/utils/arena.rs | 12 +++ src/wavefront/aggregate.rs | 11 --- src/wavefront/integrator.rs | 76 +++++++++---------- src/wavefront/mod.rs | 3 - 13 files changed, 177 insertions(+), 183 deletions(-) delete mode 100644 src/wavefront/aggregate.rs diff --git a/shared/src/core/geometry/ray.rs b/shared/src/core/geometry/ray.rs index d6f801a..4ac67a9 100644 --- a/shared/src/core/geometry/ray.rs +++ b/shared/src/core/geometry/ray.rs @@ -1,7 +1,7 @@ use super::{Normal3f, Point3f, Point3fi, Vector3f, VectorLike}; use crate::core::medium::Medium; use crate::utils::math::{next_float_down, next_float_up}; -use crate::{gvec_with_capacity, Float, GVec, Ptr, SOA}; +use crate::{gvec_with_capacity, Float, GVec, Ptr}; #[repr(C)] #[derive(Clone, Copy, Debug)] @@ -123,56 +123,3 @@ pub struct RayDifferential { pub rx_direction: Vector3f, pub ry_direction: Vector3f, } - -#[derive(Clone)] -pub struct RaySoA { - pub o: GVec, - pub d: GVec, - pub time: GVec, - pub medium: GVec>, - pub has_differentials: GVec, - pub differential: GVec, -} - -impl SoA for RaySoA { - type Item = Ray; - - fn with_capacity(n: usize) -> Self { - Self { - o: gvec_with_capacity(n), - d: gvec_with_capacity(n), - time: gvec_with_capacity(n), - medium: gvec_with_capacity(n), - has_differentials: gvec_with_capacity(n), - differential: gvec_with_capacity(n), - } - } - - fn len(&self) -> usize { - self.o.len() - } - - unsafe fn get_unchecked(&self, i: usize) -> Ray { - Ray { - o: *self.o.get_unchecked(i), - d: *self.d.get_unchecked(i), - time: *self.time.get_unchecked(i), - medium: *self.medium.get_unchecked(i), - has_differentials: *self.has_differentials.get_unchecked(i), - differential: *self.differential.get_unchecked(i), - } - } - - unsafe fn set_unchecked(&mut self, i: usize, v: Ray) { - *self.o.get_unchecked_mut(i) = v.o; - *self.d.get_unchecked_mut(i) = v.d; - *self.time.get_unchecked_mut(i) = v.time; - *self.medium.get_unchecked_mut(i) = v.medium; - *self.has_differentials.get_unchecked_mut(i) = v.has_differentials; - *self.differential.get_unchecked_mut(i) = v.differential; - } -} - -impl SoAElement for Ray { - type SoA = RaySoA; -} diff --git a/shared/src/core/material.rs b/shared/src/core/material.rs index 49c50df..916cb4d 100644 --- a/shared/src/core/material.rs +++ b/shared/src/core/material.rs @@ -178,6 +178,7 @@ pub trait MaterialTrait { fn has_subsurface_scattering(&self) -> bool; } +#[repr(C)] #[derive(Clone, Copy, Debug)] #[enum_dispatch(MaterialTrait)] pub enum Material { diff --git a/shared/src/core/primitive.rs b/shared/src/core/primitive.rs index b81607b..3213313 100644 --- a/shared/src/core/primitive.rs +++ b/shared/src/core/primitive.rs @@ -68,9 +68,6 @@ impl PrimitiveTrait for GeometricPrimitive { } } - if r.medium.is_null() { - return None; - } si.set_intersection_properties( self.material, self.area_light, diff --git a/shared/src/lib.rs b/shared/src/lib.rs index 79144ba..db60950 100644 --- a/shared/src/lib.rs +++ b/shared/src/lib.rs @@ -20,5 +20,6 @@ pub mod wavefront; pub use core::pbrt::*; pub use utils::alloc::{gbox, gvec, gvec_from_slice, gvec_with_capacity, leak, GBox, GVec}; -pub use utils::{Array2D, PBRTOptions, Ptr, Transform, SOA}; -pub use wavefront::{WavefrontAggregate, WorkQueue}; +pub use utils::{Array2D, PBRTOptions, Ptr, Transform}; +pub use utils::soa::WorkQueue; +pub use wavefront::{WavefrontAggregate}; diff --git a/shared/src/materials/coated.rs b/shared/src/materials/coated.rs index 6274b80..36197d8 100644 --- a/shared/src/materials/coated.rs +++ b/shared/src/materials/coated.rs @@ -334,7 +334,7 @@ impl MaterialTrait for CoatedConductorMaterial { } fn get_normal_map(&self) -> Option<&Image> { - Some(&*self.normal_map) + self.normal_map.get() } fn get_displacement(&self) -> Ptr { diff --git a/shared/src/utils/math.rs b/shared/src/utils/math.rs index 400cbc8..53ab582 100644 --- a/shared/src/utils/math.rs +++ b/shared/src/utils/math.rs @@ -544,7 +544,7 @@ pub fn next_float_up(v: Float) -> Float { if v >= 0.0 { ui = ui.wrapping_add(1); } else { - ui = ui.wrapping_sub(1); + ui.wrapping_sub(1); } bits_to_float(ui) } diff --git a/shared/src/utils/mod.rs b/shared/src/utils/mod.rs index 07d278d..165e2d2 100644 --- a/shared/src/utils/mod.rs +++ b/shared/src/utils/mod.rs @@ -1,4 +1,5 @@ pub mod alloc; +pub mod atomic; pub mod complex; pub mod containers; pub mod hash; @@ -10,18 +11,18 @@ pub mod ptr; pub mod quaternion; pub mod rng; pub mod sampling; +pub mod soa; pub mod sobol; pub mod splines; pub mod transform; +pub use atomic::{AtomicFloat, AtomicU32}; +pub use containers::Array2D; pub use options::PBRTOptions; pub use ptr::Ptr; pub use transform::{AnimatedTransform, Transform, TransformGeneric}; -pub use containers::Array2D; -pub use soa::SOA: use crate::Float; -use core::sync::atomic::{AtomicU32, Ordering}; #[inline] pub fn find_interval(sz: u32, pred: F) -> u32 @@ -62,61 +63,6 @@ where i } -#[repr(C)] -#[derive(Debug)] -pub struct AtomicFloat { - bits: AtomicU32, -} - -impl Default for AtomicFloat { - fn default() -> Self { - Self::new(0.) - } -} - -impl Clone for AtomicFloat { - fn clone(&self) -> Self { - Self::new(self.get()) - } -} - -impl AtomicFloat { - pub fn new(val: f32) -> Self { - Self { - bits: AtomicU32::new(val.to_bits()), - } - } - - pub fn get(&self) -> Float { - Float::from_bits(self.bits.load(Ordering::Relaxed)) - } - - pub fn set(&self, val: Float) { - self.bits.store(val.to_bits(), Ordering::Relaxed); - } - - /// Atomically adds `val` to the current value. - /// Compare-And-Swap loop. - pub fn add(&self, val: f32) { - let mut current_bits = self.bits.load(Ordering::Relaxed); - loop { - let current_val = f32::from_bits(current_bits); - let new_val = current_val + val; - let new_bits = new_val.to_bits(); - - match self.bits.compare_exchange_weak( - current_bits, - new_bits, - Ordering::Relaxed, - Ordering::Relaxed, - ) { - Ok(_) => break, - Err(x) => current_bits = x, - } - } - } -} - #[inline(always)] pub fn gpu_array_from_fn(mut f: impl FnMut(usize) -> T) -> [T; N] { unsafe { @@ -130,4 +76,3 @@ pub fn gpu_array_from_fn(mut f: impl FnMut(usize) -> T) -> [T arr.assume_init() } } - diff --git a/shared/src/utils/soa.rs b/shared/src/utils/soa.rs index 10f0406..c07f813 100644 --- a/shared/src/utils/soa.rs +++ b/shared/src/utils/soa.rs @@ -1,15 +1,120 @@ -pub trait SoA: Clone { - type Item: Copy; - fn with_capacity(n: usize) -> Self; - fn len(&self) -> usize; - fn is_empty(&self) -> bool { - self.len() == 0 +use crate::utils::AtomicU32; +use crate::{Float, Ptr}; + +#[repr(C)] +#[derive(Debug, Clone, Copy)] +pub struct SoABuffer { + pub ptr: Ptr, + pub capacity: u32, +} + +impl SoABuffer { + pub fn null() -> Self { + Self { + ptr: Ptr::null(), + capacity: 0, + } } - unsafe fn get_unchecked(&self, i: usize) -> Self::Item; - unsafe fn set_unchecked(&mut self, i: usize, v: Self::Item); + #[inline(always)] + pub fn get(&self, i: usize) -> T { + debug_assert!(i < self.capacity as usize); + unsafe { *self.ptr.as_raw().add(i) } + } + + #[inline(always)] + pub fn set(&self, i: usize, val: T) { + debug_assert!(i < self.capacity as usize); + unsafe { + let p = self.ptr.as_raw() as *mut T; + *p.add(i) = val; + } + } } -pub trait SoAElement: Copy { - type SoA: SoA; +pub trait SoA { + type Item: Copy; + + fn allocate(n: u32, alloc: &dyn SoAAllocator) -> Self; + + /// Read one element by scattering across all buffers. + unsafe fn get(&self, i: usize) -> Self::Item; + + /// Write one element by scattering across all buffers. + /// Takes &self for GPU concurrent write (disjoint indices). + unsafe fn set(&self, i: usize, v: Self::Item); +} + +pub trait SoAAllocator { + fn alloc_raw(&self, layout: core::alloc::Layout) -> *mut u8; +} + +pub fn alloc_soa_buffer(n: u32, alloc: &dyn SoAAllocator) -> SoABuffer { + if n == 0 { + return SoABuffer::null(); + } + let layout = core::alloc::Layout::array::(n as usize).unwrap(); + let raw = alloc.alloc_raw(layout); + SoABuffer { + ptr: Ptr::from_raw(raw as *mut T), + capacity: n, + } +} + +#[repr(C)] +pub struct WorkQueue { + pub storage: S, + pub count: AtomicU32, + pub capacity: u32, +} + +impl WorkQueue { + pub fn new(storage: S, capacity: u32) -> Self { + Self { + storage, + count: AtomicU32::new(0), + capacity, + } + } + + /// Number of items currently in the queue. + #[inline(always)] + pub fn size(&self) -> u32 { + self.count.load() + } + + /// Reset the queue to empty. Call from host between kernel launches. + #[inline(always)] + pub fn reset(&self) { + self.count.store(0); + } + + /// Push an item, returning its index. Returns None if the queue is full. + /// The atomic increment guarantees each thread gets a unique slot. + #[inline(always)] + pub fn push(&self, item: S::Item) -> Option { + let slot = self.count.fetch_add(1); + if slot >= self.capacity { + // Queue overflow — this shouldn't happen if capacity is + // sized correctly. In debug builds we want to know about it. + debug_assert!(false, "WorkQueue overflow: {} >= {}", slot, self.capacity); + return None; + } + unsafe { + self.storage.set(slot as usize, item); + } + Some(slot) + } + + /// Read an item at the given index. Used by consumer kernels. + #[inline(always)] + pub unsafe fn get(&self, i: usize) -> S::Item { + debug_assert!( + (i as u32) < self.size(), + "WorkQueue::get out of bounds: {} >= {}", + i, + self.size() + ); + self.storage.get(i) + } } diff --git a/src/lib.rs b/src/lib.rs index e2cad71..697e2ee 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -14,4 +14,4 @@ pub mod utils; pub mod wavefront; pub use utils::{Arena, FileLoc, ParameterDictionary, Upload, ArenaUpload}; -pub const MAX_TAGS = 16; +pub const MAX_TAGS: u32 = 16; diff --git a/src/utils/arena.rs b/src/utils/arena.rs index af1a49a..32c5677 100644 --- a/src/utils/arena.rs +++ b/src/utils/arena.rs @@ -1,6 +1,7 @@ use crate::utils::backend::GpuAllocator; use crate::utils::mipmap::MIPMap; use parking_lot::Mutex; +use shared::utils::soa::SoAAllocator; use shared::Ptr; use std::alloc::Layout; use std::collections::HashMap; @@ -161,6 +162,11 @@ impl Arena { (Ptr::from_raw(ptr), len) } + pub fn alloc_layout(&self, layout: Layout) -> *mut u8 { + let mut bump = self.bump.lock(); + bump.alloc_layout(layout) + } + pub fn get_texture_object(&self, mipmap: &Arc) -> u64 { let key = Arc::as_ptr(mipmap) as usize; let mut cache = self.texture_cache.lock(); @@ -178,3 +184,9 @@ impl Default for Arena { Self::new(A::default()) } } + +impl SoAAllocator for Arena { + fn alloc_raw(&self, layout: core::alloc::Layout) -> *mut u8 { + self.alloc_layout(layout) + } +} diff --git a/src/wavefront/aggregate.rs b/src/wavefront/aggregate.rs deleted file mode 100644 index 0605ece..0000000 --- a/src/wavefront/aggregate.rs +++ /dev/null @@ -1,11 +0,0 @@ -use shared::core::geometry::Bounds3f; -use super::{RayQueue, EscapedRayQueue, HitAreaLightQueue, MaterialEvalQueue, MediumSampleQueue, SubsurfaceScatterQueue}; - -#[derive(Clone, Debug)] -pub trait WavefrontAggregate { - fn bounds(&self) -> Bounds3f; - fn intersect_closest(max_rays: usize, ray_q: &mut RayQueue, hit_area_light_q: &mut HitAreaLightQueue, basic_mlt_q: &mut MaterialEvalQueue, universal_mtl_q: &mut MaterialEvalQueue, medium_sample_q: &mut MediumSampleQueue); - fn intersect_shadow(max_rays: usize, shadow_ray_q: &mut ShadowRayQueue, pixel_sample_state: &mut SOA); - fn intersect_shadow_tr(max_rays: usize, shadow_ray_q: &mut ShadowRayQueue, pixel_sample_state: &mut SOA); - fn intersect_one_random(max_rays: usize, subsurface_scatte_q: &mut SubsurfaceScatterQueue); -} diff --git a/src/wavefront/integrator.rs b/src/wavefront/integrator.rs index 8cba5ee..9adf61c 100644 --- a/src/wavefront/integrator.rs +++ b/src/wavefront/integrator.rs @@ -1,38 +1,38 @@ -use crate::MAX_TAGS; -use shared::{Ptr, GVec}; -use shared::core::film::Film; -use shared::core::color::RGB; -use shared::core::filter::Filter; -use shared::core::light::Light; -use shared::core::sampler::Filter; -use shared::wavefront::{WavefrontAggregate, RayQueue, MediumSampleQueue, EscapedRayQueue, HitAreaLightQueue, MaterialEvalQueue, ShadowRayQueue, GetBSSRDFAndProbeRayQueue, SubsurfaceScatterQueue}; - -pub struct WavefrontPathIntegrator { - pub init_visible_surface: bool, - pub have_subsurface: bool, - pub have_media: bool, - pub have_basic_eval_material: [bool; MAX_TAGS + 1], - pub have_universal_eval_material: [bool; MAX_TAGS + 1], - pub filter: Filter, - pub film: Film, - pub sampler: Sampler, - pub camera: Camera, - pub infinite_lights: GVec, - pub max_depth: usize, - pub sampler_per_pixel: usize, - pub regularize: bool, - pub scanlines_per_pixel: usize, - pub max_queue_size: usize, - pub medium_sample_queue: Ptr, - pub medium_scatter_queue: Ptr, - pub escaped_ray_queue: Ptr, - pub hit_area_light_queue: Ptr, - pub basic_eval_material_queue: Ptr, - pub universal_eval_material_queue: Ptr, - pub shadow_ray_queue: Ptr, - pub bssrdf_eval_queue: PTr, - pub subsurface_scatter_queue: Ptr, - pub display_rgb: Ptr, - pub display_rgb_host: Ptr, +// pub max_depth: usize, +// pub sampler_per_pixel: usize, +// pub regularize: bool, +// pub scanlines_per_pixel: usize, +// pub max_queue_size: usize, +// pub medium_sample_queue: Ptr, +// pub medium_scatter_queue: Ptr, +// pub escaped_ray_queue: Ptr, +// pub hit_area_light_queue: Ptr, +// pub basic_eval_material_queue: Ptr, +// pub universal_eval_material_queue: Ptr, +// pub shadow_ray_queue: Ptr, +// pub bssrdf_eval_queue: PTr, +// pub subsurface_scatter_queue: Ptr, +// pub display_rgb: Ptr, +// pub display_rgb_host: Ptr, +// +// } diff --git a/src/wavefront/mod.rs b/src/wavefront/mod.rs index 41f5476..12b9be2 100644 --- a/src/wavefront/mod.rs +++ b/src/wavefront/mod.rs @@ -1,4 +1 @@ pub mod integrator; -pub mod aggregate; - -pub use aggregate::WavefrontAggregate;