From 3b3f9eb155e7a96f0215dad54fbe5c72c52c7ca8 Mon Sep 17 00:00:00 2001 From: pingu Date: Sun, 18 Jan 2026 16:25:56 +0000 Subject: [PATCH] Working on fixing import errors, standardizing CPU/GPU container types --- src/core/medium.rs | 184 ++++++++++++++ src/filters/boxf.rs | 1 + src/filters/gaussian.rs | 31 +++ src/filters/lanczos.rs | 22 ++ src/filters/mitchell.rs | 24 ++ src/filters/mod.rs | 5 + src/filters/triangle.rs | 0 src/gpu/driver.rs | 320 +++++++++++++++++++++++ src/gpu/memory.rs | 534 +++++++++++++++++++++++++++++++++++++++ src/gpu/mod.rs | 5 + src/gpu/wavefront/mod.rs | 45 ++++ 11 files changed, 1171 insertions(+) create mode 100644 src/core/medium.rs create mode 100644 src/filters/boxf.rs create mode 100644 src/filters/gaussian.rs create mode 100644 src/filters/lanczos.rs create mode 100644 src/filters/mitchell.rs create mode 100644 src/filters/mod.rs create mode 100644 src/filters/triangle.rs create mode 100644 src/gpu/driver.rs create mode 100644 src/gpu/memory.rs create mode 100644 src/gpu/mod.rs create mode 100644 src/gpu/wavefront/mod.rs diff --git a/src/core/medium.rs b/src/core/medium.rs new file mode 100644 index 0000000..c3830a6 --- /dev/null +++ b/src/core/medium.rs @@ -0,0 +1,184 @@ +use crate::spectra::dense::DenselySampledSpectrumBuffer; +use shared::core::geometry::Bounds3f; +use shared::core::medium::{GridMedium, HomogeneousMedium, RGBGridMedium}; +use shared::spectra::{RGBIlluminantSpectrum, RGBUnboundedSpectrum}; +use shared::utils::Transform; +use shared::utils::containers::SampledGrid; +use shared::{Float, core::medium::MajorantGrid}; + +pub struct MajorantGridHost { + pub device: MajorantGrid, + voxels: Vec, +} + +impl MajorantGridHost { + pub fn new(bounds: Bounds3f, res: Point3i) -> Self { + let n = (res.x() * res.y() * res.z()) as usize; + let voxels = vec![0.0; n]; + + let device = MajorantGrid { + bounds, + res, + voxels: std::ptr::null_mut(), + n_voxels: n as u32, + }; + + Self { device, voxels } + } +} + +pub trait RGBGridMediumCreator { + fn new( + bounds: &Bounds3f, + render_from_medium: &Transform, + g: Float, + sigma_a_grid: SampledGrid, + sigma_s_grid: SampledGrid, + sigma_scale: Float, + le_grid: SampledGrid, + le_scale: Float, + ) -> Self; +} + +impl RGBGridMediumCreator for RGBGridMedium { + #[allow(clippy::too_many_arguments)] + fn new( + bounds: &Bounds3f, + render_from_medium: &Transform, + g: Float, + sigma_a_grid: SampledGrid, + sigma_s_grid: SampledGrid, + sigma_scale: Float, + le_grid: SampledGrid, + le_scale: Float, + ) -> Self { + let majorant_grid = MajorantGrid::new(*bounds, Point3i::new(16, 16, 16)); + for z in 0..majorant_grid.res.x() { + for y in 0..majorant_grid.res.y() { + for x in 0..majorant_grid.res.x() { + let bounds = majorant_grid.voxel_bounds(x, y, z); + let convert = |s: &RGBUnboundedSpectrum| s.max_value(); + let max_sigma_t = sigma_a_grid.max_value_convert(bounds, convert) + + sigma_s_grid.max_value_convert(bounds, convert); + majorant_grid.set(x, y, z, sigma_scale * max_sigma_t); + } + } + } + + Self { + bounds: *bounds, + render_from_medium: *render_from_medium, + le_grid, + le_scale, + phase: HGPhaseFunction::new(g), + sigma_a_grid, + sigma_s_grid, + sigma_scale, + majorant_grid, + } + } +} + +pub trait GridMediumCreator { + pub fn new( + bounds: &Bounds3f, + render_from_medium: &Transform, + sigma_a: &Spectrum, + sigma_s: &Spectrum, + sigma_scale: Float, + g: Float, + density_grid: SampledGrid, + temperature_grid: SampledGrid, + le: &Spectrum, + le_scale: SampledGrid, + ) -> Self; +} + +impl GridMediumCreator for GridMedium { + #[allow(clippy::too_many_arguments)] + fn new( + bounds: &Bounds3f, + render_from_medium: &Transform, + sigma_a: &Spectrum, + sigma_s: &Spectrum, + sigma_scale: Float, + g: Float, + density_grid: SampledGrid, + temperature_grid: SampledGrid, + le: &Spectrum, + le_scale: SampledGrid, + ) -> Self { + let mut sigma_a_spec = DenselySampledSpectrum::from_spectrum(sigma_a); + let mut sigma_s_spec = DenselySampledSpectrum::from_spectrum(sigma_s); + let le_spec = DenselySampledSpectrum::from_spectrum(le); + sigma_a_spec.scale(sigma_scale); + sigma_s_spec.scale(sigma_scale); + + let mut majorant_grid = MajorantGrid::new(*bounds, Point3i::new(16, 16, 16)); + let is_emissive = if temperature_grid.is_some() { + true + } else { + le_spec.max_value() > 0. + }; + + for z in 0..majorant_grid.res.z() { + for y in 0..majorant_grid.res.y() { + for x in 0..majorant_grid.res.x() { + let bounds = majorant_grid.voxel_bounds(x, y, z); + majorant_grid.set(x, y, z, density_grid.max_value(bounds)); + } + } + } + + Self { + bounds: *bounds, + render_from_medium: *render_from_medium, + sigma_a_spec, + sigma_s_spec, + density_grid, + phase: HGPhaseFunction::new(g), + temperature_grid, + le_spec, + le_scale, + is_emissive, + majorant_grid, + } + } +} + +pub trait HomogeneousMediumCreator { + fn new( + sigma_a: Spectrum, + sigma_s: Spectrum, + sigma_scale: Float, + le: Spectrum, + le_scale: Float, + g: Float, + ) -> Self; +} + +impl HomogeneousMediumCreator for HomogeneousMedium { + fn new( + sigma_a: Spectrum, + sigma_s: Spectrum, + sigma_scale: Float, + le: Spectrum, + le_scale: Float, + g: Float, + ) -> Self { + let mut sigma_a_spec = DenselySampledSpectrumBuffer::from_spectrum(&sigma_a); + let mut sigma_s_spec = DenselySampledSpectrumBuffer::from_spectrum(&sigma_s); + let mut le_spec = DenselySampledSpectrum::from_spectrum(&le); + + sigma_a_spec.scale(sigma_scale); + sigma_s_spec.scale(sigma_scale); + le_spec.scale(le_scale); + + Self { + sigma_a_spec, + sigma_s_spec, + le_spec, + phase: HGPhaseFunction::new(g), + } + } +} diff --git a/src/filters/boxf.rs b/src/filters/boxf.rs new file mode 100644 index 0000000..2af62d1 --- /dev/null +++ b/src/filters/boxf.rs @@ -0,0 +1 @@ +pub trait BoxFilterCreator {} diff --git a/src/filters/gaussian.rs b/src/filters/gaussian.rs new file mode 100644 index 0000000..906ee52 --- /dev/null +++ b/src/filters/gaussian.rs @@ -0,0 +1,31 @@ +use crate::core::filter::CreateFilterSampler; +use shared::Float; +use shared::core::filter::FilterSampler; +use shared::core::geometry::Vector2f; +use shared::filters::GaussianFilter; +use shared::utils::math::gaussian; + +pub trait GaussianFilterCreator { + fn new(radius: Vector2f, sigma: Float) -> Self; +} + +impl GaussianFilterCreator for GaussianFilter { + fn new(radius: Vector2f, sigma: Float) -> Self { + let exp_x = gaussian(radius.x(), 0., sigma); + let exp_y = gaussian(radius.y(), 0., sigma); + + let sampler = FilterSampler::new(radius, move |p: Point2f| { + let gx = (gaussian(p.x(), 0., sigma) - exp_x).max(0.0); + let gy = (gaussian(p.y(), 0., sigma) - exp_y).max(0.0); + gx * gy + }); + + Self { + radius, + sigma, + exp_x: gaussian(radius.x(), 0., sigma), + exp_y: gaussian(radius.y(), 0., sigma), + sampler, + } + } +} diff --git a/src/filters/lanczos.rs b/src/filters/lanczos.rs new file mode 100644 index 0000000..cc73f78 --- /dev/null +++ b/src/filters/lanczos.rs @@ -0,0 +1,22 @@ +use shared::Float; +use shared::core::geometry::{Point2f, Vector2f}; +use shared::filters::LanczosSincFilter; +use shared::utils::math::windowed_sinc; + +pub trait LanczosFilterCreator { + fn new(radius: Vector2f, tau: Float) -> Self; +} + +impl LanczosFilterCreator for LanczosSincFilter { + fn new(radius: Vector2f, tau: Float) -> Self { + let sampler = FilterSampler::new(radius, move |p: Point2f| { + windowed_sinc(p.x(), radius.x(), tau) * windowed_sinc(p.y(), radius.y(), tau) + }); + + Self { + radius, + tau, + sampler, + } + } +} diff --git a/src/filters/mitchell.rs b/src/filters/mitchell.rs new file mode 100644 index 0000000..a828ee1 --- /dev/null +++ b/src/filters/mitchell.rs @@ -0,0 +1,24 @@ +use shared::Float; +use shared::core::geometry::{Point2f, Vector2f}; +use shared::filters::MitchellFilter; + +pub trait MitchellFilterCreator { + fn new(radius: Vector2f, b: Float, c: Float) -> Self; +} + +impl MitchellFilterCreator for MitchellFilter { + fn new(radius: Vector2f, b: Float, c: Float) -> Self { + let sampler = FilterSampler::new(radius, move |p: Point2f| { + let nx = 2.0 * p.x() / radius.x(); + let ny = 2.0 * p.y() / radius.y(); + Self::mitchell_1d_eval(b, c, nx) * Self::mitchell_1d_eval(b, c, ny) + }); + + Self { + radius, + b, + c, + sampler, + } + } +} diff --git a/src/filters/mod.rs b/src/filters/mod.rs new file mode 100644 index 0000000..f36bc92 --- /dev/null +++ b/src/filters/mod.rs @@ -0,0 +1,5 @@ +pub mod boxf; +pub mod gaussian; +pub mod lanczos; +pub mod mitchell; +pub mod triangle; diff --git a/src/filters/triangle.rs b/src/filters/triangle.rs new file mode 100644 index 0000000..e69de29 diff --git a/src/gpu/driver.rs b/src/gpu/driver.rs new file mode 100644 index 0000000..db0c5bd --- /dev/null +++ b/src/gpu/driver.rs @@ -0,0 +1,320 @@ +use bytemuck::{Pod, Zeroable}; +use cust::context::{CacheConfig, CurrentContext, ResourceLimit}; +use cust::device::DeviceAttribute; +use cust::memory::DeviceCopy; +use cust::prelude::*; +use lazy_static::lazy_static; +use parking_lot::Mutex; +use std::error::Error; +use std::sync::Arc; + +use shared::Float; +use shared::core::geometry::{Normal, Point, Vector}; +use shared::core::options::get_options; +use shared::spectra::{SampledSpectrum, SampledWavelengths}; +use shared::utils::interval::Interval; + +#[macro_export] +macro_rules! impl_gpu_traits { + ($name:ty) => { + unsafe impl DeviceCopy for $name {} + unsafe impl Zeroable for $name {} + unsafe impl Pod for $name {} + }; +} + +#[macro_export] +macro_rules! impl_math_gpu_traits { + ($Struct:ident) => { + #[cfg(feature = "use_gpu")] + unsafe impl DeviceCopy for $Struct where T: DeviceCopy + Copy {} + + unsafe impl Zeroable for $Struct where T: Zeroable {} + + unsafe impl Pod for $Struct where T: Pod {} + }; +} + +impl_math_gpu_traits!(Vector); +impl_math_gpu_traits!(Normal); +impl_math_gpu_traits!(Point); +impl_gpu_traits!(Interval); +impl_gpu_traits!(Float4); +impl_gpu_traits!(SampledSpectrum); +impl_gpu_traits!(SampledWavelengths); + +#[repr(C, align(16))] +#[derive(Clone, Copy, Debug, Default, PartialEq)] +pub struct Float4 { + pub v: [f32; 4], +} +pub type Vec4 = Vector; + +impl From for Float4 { + #[inline] + fn from(vec: Vector) -> Self { + Self { v: vec.0 } + } +} + +impl From for Vec4 { + #[inline] + fn from(storage: Float4) -> Self { + Vector(storage.v) + } +} + +struct KernelStats { + description: String, + num_launches: usize, + sum_ms: f32, + min_ms: f32, + max_ms: f32, +} + +impl KernelStats { + fn new(description: &str) -> Self { + Self { + description: description.to_string(), + num_launches: 0, + sum_ms: 0.0, + min_ms: 0.0, + max_ms: 0.0, + } + } +} + +struct ProfilerEvent { + start: Event, + stop: Event, + active: bool, + stats: Option>>, +} + +impl ProfilerEvent { + fn new() -> Result { + let start = Event::new(EventFlags::DEFAULT)?; + let stop = Event::new(EventFlags::DEFAULT)?; + Ok(Self { + start, + stop, + active: false, + stats: None, + }) + } + + fn sync(&mut self) { + if !self.active { + return; + } + + if self.stop.synchronize().is_ok() { + match self.stop.elapsed_time_f32(&self.start) { + Ok(ms) => { + if let Some(stats_arc) = &self.stats { + let mut stats = stats_arc.lock(); + stats.num_launches += 1; + if stats.num_launches == 1 { + stats.sum_ms = ms; + stats.min_ms = ms; + stats.max_ms = ms; + } else { + stats.sum_ms += ms; + stats.min_ms = stats.min_ms.min(ms); + stats.max_ms = stats.max_ms.max(ms); + } + } + } + Err(e) => log::error!("Failed to get elapsed time: {:?}", e), + } + } + self.active = false; + } +} + +struct Profiler { + kernel_stats: Vec>>, + event_pool: Vec, + pool_offset: usize, +} + +impl Profiler { + fn new() -> Self { + Self { + kernel_stats: Vec::new(), + event_pool: Vec::new(), + pool_offset: 0, + } + } + + fn prepare<'a>(&'a mut self, description: &str) -> &'a mut ProfilerEvent { + if self.event_pool.is_empty() { + for _ in 0..128 { + if let Ok(e) = ProfilerEvent::new() { + self.event_pool.push(e); + } + } + } + + if self.pool_offset >= self.event_pool.len() { + self.pool_offset = 0; + } + + let idx = self.pool_offset; + self.pool_offset += 1; + + let pe = &mut self.event_pool[idx]; + + if pe.active { + pe.sync(); + } + + pe.active = true; + pe.stats = None; + + let mut found = None; + for s in &self.kernel_stats { + if s.lock().description == description { + found = Some(s.clone()); + break; + } + } + + if found.is_none() { + let new_stats = Arc::new(Mutex::new(KernelStats::new(description))); + self.kernel_stats.push(new_stats.clone()); + found = Some(new_stats); + } + + pe.stats = found; + pe + } +} + +pub struct GpuState { + pub context: Context, + pub stream: Stream, + profiler: Profiler, +} + +impl GpuState { + fn init(device_index: u32) -> Result> { + match cust::init(CudaFlags::empty()) { + Ok(_) => {} + Err(cust::error::CudaError::SharedObjectInitFailed) => {} + Err(e) => return Err(Box::new(e)), + } + + let device = Device::get_device(device_index)?; + let name = device.name().unwrap_or_else(|_| "Unknown".into()); + + log::info!("Selected GPU: {}", name); + + let has_unified = device + .get_attribute(DeviceAttribute::UnifiedAddressing) + .unwrap_or(0); + if has_unified == 0 { + panic!("Selected GPU does not support unified addressing."); + } + + let context = Context::new(device)?; + + CurrentContext::set_resource_limit(ResourceLimit::StackSize, 8192)?; + CurrentContext::set_resource_limit(ResourceLimit::PrintfFifoSize, 32 * 1024 * 1024)?; + CurrentContext::set_cache_config(CacheConfig::PreferL1)?; + + let stream = Stream::new(StreamFlags::DEFAULT, None)?; + + Ok(Self { + context, + stream, + profiler: Profiler::new(), + }) + } +} + +lazy_static! { + pub static ref GPU_STATE: Mutex> = Mutex::new(None); +} + +pub fn gpu_init() { + if !get_options().use_gpu { + return; + } + + let device_id = get_options().gpu_device.unwrap_or(0); + log::info!("Initializing GPU Device {}", device_id); + + match GpuState::init(device_id) { + Ok(state) => { + #[cfg(feature = "use_nvtx")] + nvtx::name_thread("MAIN_THREAD"); + *GPU_STATE.lock() = Some(state); + } + Err(e) => { + panic!("Failed to initialize GPU: {:?}", e); + } + } +} + +pub fn gpu_thread_init() { + if let Some(state) = GPU_STATE.lock().as_ref() { + if let Err(e) = CurrentContext::set_current(&state.context) { + log::error!("Failed to set CUDA context for thread: {:?}", e); + } + } +} + +const SANITY_PTX: &str = r#" +.version 6.5 +.target sm_30 +.address_size 64 +.visible .entry scale_array_kernel(.param .u64 ptr, .param .u32 n, .param .f32 scale) { + .reg .u64 %ptr; .reg .u32 %n; .reg .f32 %scale; + .reg .u32 %tid; .reg .u32 %ntid; .reg .u32 %ctid; .reg .u32 %idx; + .reg .pred %p; .reg .u64 %addr; .reg .f32 %val; + + ld.param.u64 %ptr, [ptr]; + ld.param.u32 %n, [n]; + ld.param.f32 %scale, [scale]; + + mov.u32 %tid, %tid.x; + mov.u32 %ntid, %ntid.x; + mov.u32 %ctid, %ctaid.x; + mad.lo.s32 %idx, %ctid, %ntid, %tid; + + setp.ge.u32 %p, %idx, %n; + @%p ret; + + mul.wide.u32 %addr, %idx, 4; + add.u64 %addr, %ptr, %addr; + ld.global.f32 %val, [%addr]; + mul.f32 %val, %val, %scale; + st.global.f32 [%addr], %val; +} +"#; + +pub fn launch_scale_kernel(ptr: *mut f32, len: usize, scale: f32) -> Result<(), Box> { + // Note: quick_init works best for isolated tests. + // If the main engine runs, this might conflict, but for the sanity test it is correct. + let _ctx = cust::quick_init()?; + + let module = Module::from_str(SANITY_PTX)?; + let stream = Stream::new(StreamFlags::NON_BLOCKING, None)?; + let mut kernel = module.get_function("scale_array_kernel")?; + + let block_size = 256; + let grid_size = (len as u32 + block_size - 1) / block_size; + + unsafe { + launch!( + kernel<<>>( + ptr as u64, + len as u32, + scale + ) + )?; + } + stream.synchronize()?; + Ok(()) +} diff --git a/src/gpu/memory.rs b/src/gpu/memory.rs new file mode 100644 index 0000000..5661f4a --- /dev/null +++ b/src/gpu/memory.rs @@ -0,0 +1,534 @@ +#![allow(clippy::too_many_arguments)] +use super::Float4; +use crate::Float; +use crate::core::geometry::{Normal3f, Point2f, Point2i, Point3f, Point3fi, Ray, Vector3f}; +use crate::lights::LightSampleContext; +use crate::spectra::{SampledSpectrum, SampledWavelengths}; +// use cust::memory::{CopyDestination, DeviceMemory}; +// use cust::prelude::*; + +#[macro_export] +macro_rules! soa_struct { + ( + $(#[$outer:meta])* + pub struct $name:ident { + $( + pub $field:ident : $type:ty + ),* $(,)? + } +) => { + #[cfg(feature = "use_gpu")] + $(#[$outer])* + pub struct $name { + capacity: u32, + pub count: cust::memory::DeviceBuffer, + $( + pub $field: cust::memory::DeviceBuffer<$type>, + )* + } + + #[cfg(feature = "use_gpu")] + impl $name { + pub fn new(capacity: usize) -> cust::error::CudaResult { + use cust::memory::DeviceBuffer; + Ok(Self { + capacity: capacity as u32, + count: DeviceBuffer::zeroed(1)?, + $( + $field: DeviceBuffer::zeroed(capacity)?, + )* + }) + } + + pub fn len(&self) -> cust::error::CudaResult { + let mut host_count = [0u32; 1]; + self.count.copy_to(&mut host_count)?; + Ok(host_count[0]) + } + + pub fn reset(&mut self) -> cust::error::CudaResult<()> { + self.count.copy_from(&[0]) + } + + // Generate the View name + pub fn as_view(&mut self) -> paste::paste! { [<$name View>] } { + paste::paste! { + [<$name View>] { + capacity: self.capacity, + count: self.count.as_device_ptr().as_mut_ptr(), + $( + $field: self.$field.as_device_ptr().as_raw() as *mut $type, + )* + } + } + } + } + + paste::paste! { + #[repr(C)] + #[derive(Clone, Copy)] + pub struct [<$name View>] { + pub capacity: u32, + pub count: *mut u32, + $( + pub $field: *mut $type, + )* + } + + unsafe impl cust::memory::DeviceCopy for [<$name View>] {} + + impl [<$name View>] { + // The raw push that fills every field + #[cfg(feature = "use_gpu")] + pub unsafe fn push(&self, $( $field : $type ),* ) -> Option { + use core::sync::atomic::{AtomicU32, Ordering}; + + let index = unsafe { + let counter_ptr = self.count as *mut AtomicU32; + (*counter_ptr).fetch_add(1, Ordering::Relaxed) + }; + + if index >= self.capacity { + return None; + } + + unsafe { + $( + *self.$field.add(index as usize) = $field; + )* + } + + Some(index) + } + + #[cfg(feature = "use_gpu")] + pub unsafe fn size(&self) -> u32 { + use core::sync::atomic::{AtomicU32, Ordering}; + unsafe { + (*(self.count as *const AtomicU32)).load(Ordering::Relaxed) + } + } + + $( + #[cfg(feature = "use_gpu")] + pub fn [<$field _ptr>](&self) -> *mut $type { + self.$field + } + )* + } + } + }; +} + +#[repr(C)] +#[derive(Clone, Copy, Default)] +pub struct RaySamplesDirect { + pub u: Point2f, + pub uc: Float, +} + +#[repr(C)] +#[derive(Clone, Copy, Default)] +pub struct RaySamplesIndirect { + pub uc: Float, + pub rr: Float, + pub u: Point2f, +} + +#[repr(C)] +#[derive(Clone, Copy, Default)] +pub struct RaySamplesSubsurface { + pub uc: Float, + pub u: Point2f, +} + +#[repr(C)] +#[derive(Clone, Copy, Default)] +pub struct RaySamples { + pub direct: RaySamplesDirect, + pub indirect: RaySamplesIndirect, + pub have_subsurface: bool, + pub subsurface: RaySamplesSubsurface, +} + +soa_struct! { + pub struct RayQueue { + pub ray_o: Point3f, + pub ray_d: Vector3f, + + pub depth: i32, + pub lambda: SampledWavelengths, + pub pixel_index: u32, + + pub beta: SampledSpectrum, + pub r_u: SampledSpectrum, + pub r_l: SampledSpectrum, + + pub ctx_pi: Point3f, + pub ctx_n: Normal3f, + pub ctx_ns: Normal3f, + + pub eta_scale: Float, + pub specular_bounce: u32, + pub any_non_specular_bounces: u32, + } +} + +soa_struct! { + pub struct PixelSampleStateStorage { + pub p_pixel: Point2i, + pub l: SampledSpectrum, + pub lambda: SampledWavelengths, + pub filter_weight: Float, + pub visible_surface: u32, + pub camera_ray_weight: SampledSpectrum, + + pub rs_direct_packed: Float4, + pub rs_indirect_packed: Float4, + pub rs_subsurface_packed: Float4, + } +} + +soa_struct! { + pub struct EscapedRayQueue { + pub ray_o: Point3f, + pub ray_d: Vector3f, + pub depth: i32, + pub lambda: SampledWavelengths, + pub pixel_index: u32, + pub beta: SampledSpectrum, + pub specular_bounce: u32, + pub r_u: SampledSpectrum, + pub r_l: SampledSpectrum, + pub ctx_pi: Point3f, + pub ctx_n: Normal3f, + pub ctx_ns: Normal3f, + } +} + +soa_struct! { + pub struct HitAreaLightQueue { + pub area_light_id: u32, // Light ID + pub p: Point3f, + pub n: Normal3f, + pub uv: Point2f, + pub wo: Vector3f, + pub lambda: SampledWavelengths, + pub depth: i32, + pub beta: SampledSpectrum, + pub r_u: SampledSpectrum, + pub r_l: SampledSpectrum, + pub ctx_pi: Point3f, + pub ctx_n: Normal3f, + pub ctx_ns: Normal3f, + pub specular_bounce: u32, + pub pixel_index: u32, + } +} + +soa_struct! { + pub struct ShadowRayQueue { + pub ray_o: Point3f, + pub ray_d: Vector3f, + pub t_max: Float, + pub lambda: SampledWavelengths, + pub ld: SampledSpectrum, + pub r_u: SampledSpectrum, + pub r_l: SampledSpectrum, + pub pixel_index: u32, + } +} + +soa_struct! { + pub struct GetBSSRDFAndProbeRayQueue { + pub material_id: u32, + pub lambda: SampledWavelengths, + pub beta: SampledSpectrum, + pub r_u: SampledSpectrum, + pub p: Point3f, + pub wo: Vector3f, + pub n: Normal3f, + pub ns: Normal3f, + pub dpdus: Vector3f, + pub uv: Point2f, + pub depth: i32, + pub mi_inside: u32, + pub mi_outside: u32, + pub eta_scale: Float, + pub pixel_index: u32, + } +} + +soa_struct! { + pub struct SubsurfaceScatterQueue { + pub p0: Point3f, + pub p1: Point3f, + pub depth: i32, + pub material_id: u32, + pub lambda: SampledWavelengths, + pub beta: SampledSpectrum, + pub r_u: SampledSpectrum, + pub mi_inside: u32, + pub mi_outside: u32, + pub eta_scale: Float, + pub pixel_index: u32, + } +} + +soa_struct! { + pub struct MediumSampleQueue { + pub ray_o: Point3f, + pub ray_d: Vector3f, + pub t_max: Float, + pub lambda: SampledWavelengths, + pub beta: SampledSpectrum, + pub r_u: SampledSpectrum, + pub r_l: SampledSpectrum, + pub pixel_index: u32, + + pub ctx_pi: Point3f, + pub ctx_n: Normal3f, + pub ctx_ns: Normal3f, + + pub specular_bounce: u32, + pub any_non_specular_bounces: u32, + pub eta_scale: Float, + + pub area_light_id: u32, + pub pi: Point3fi, + pub n: Normal3f, + pub dpdu: Vector3f, + pub dpdv: Vector3f, + pub wo: Vector3f, + pub uv: Point2f, + pub material_id: u32, + pub ns: Normal3f, + pub dpdus: Vector3f, + pub dpdvs: Vector3f, + pub dndus: Normal3f, + pub dndvs: Normal3f, + pub face_index: i32, + pub mi_inside: u32, + pub mi_outside: u32, + } +} + +soa_struct! { + pub struct MaterialEvalQueue { + pub material_id: u32, + pub pi: Point3fi, + pub n: Normal3f, + pub dpdu: Vector3f, + pub dpdv: Vector3f, + pub time: Float, + pub depth: i32, + pub ns: Normal3f, + pub dpdus: Vector3f, + pub dpdvs: Vector3f, + pub dndus: Normal3f, + pub dndvs: Normal3f, + pub uv: Point2f, + pub face_index: i32, + pub lambda: SampledWavelengths, + pub pixel_index: u32, + pub any_non_specular_bounces: u32, + pub wo: Vector3f, + pub beta: SampledSpectrum, + pub r_u: SampledSpectrum, + pub eta_scale: Float, + pub mi_inside: u32, + pub mi_outside: u32, + } +} + +soa_struct! { + pub struct MediumScatterQueue { + pub p: Point3f, + pub depth: usize, + pub lambda: SampledWavelengths, + pub beta: SampledSpectrum, + pub r_u: SampledSpectrum, + pub wo: Vector3f, + pub time: Float, + pub eta_scale: Float, + pub pixel_index: usize, + + // ID + pub phase_function: u32, + pub medium: u32, + } +} + +#[repr(C)] +#[derive(Clone, Copy)] +pub struct RayWorkItem { + pub ray: Ray, + pub depth: i32, + pub lambda: SampledWavelengths, + pub pixel_index: u32, + pub beta: SampledSpectrum, + pub r_u: SampledSpectrum, + pub r_l: SampledSpectrum, + pub prev_intr_ctx: LightSampleContext, + pub eta_scale: Float, + pub specular_bounce: bool, + pub any_non_specular_bounces: bool, +} + +#[repr(C)] +#[derive(Clone, Copy)] +pub struct EscapedRayWorkItem { + pub ray_o: Point3f, + pub ray_d: Vector3f, + pub depth: i32, + pub lambda: SampledWavelengths, + pub pixel_index: u32, + pub beta: SampledSpectrum, + pub specular_bounce: bool, + pub r_u: SampledSpectrum, + pub r_l: SampledSpectrum, + pub prev_intr_ctx: LightSampleContext, +} + +#[repr(C)] +#[derive(Clone, Copy)] +pub struct ShadowRayWorkItem { + pub ray: Ray, + pub t_max: Float, + pub lambda: SampledWavelengths, + pub ld: SampledSpectrum, + pub r_u: SampledSpectrum, + pub r_l: SampledSpectrum, + pub pixel_index: u32, +} + +impl RayQueueView { + #[cfg(feature = "use_gpu")] + pub unsafe fn push_work_item(&self, item: RayWorkItem) -> Option { + unsafe { + self.push( + item.ray.o, + item.ray.d, + item.depth, + item.lambda, + item.pixel_index, + item.beta, + item.r_u, + item.r_l, + item.prev_intr_ctx.pi.into(), + item.prev_intr_ctx.n, + item.prev_intr_ctx.ns, + item.eta_scale, + if item.specular_bounce { 1 } else { 0 }, + if item.any_non_specular_bounces { 1 } else { 0 }, + ) + } + } +} + +impl EscapedRayQueueView { + #[cfg(feature = "use_gpu")] + pub unsafe fn push_work_item(&self, r: &RayWorkItem) -> Option { + unsafe { + self.push( + r.ray.o, + r.ray.d, + r.depth, + r.lambda, + r.pixel_index, + r.beta, + if r.specular_bounce { 1 } else { 0 }, + r.r_u, + r.r_l, + r.prev_intr_ctx.pi.into(), + r.prev_intr_ctx.n, + r.prev_intr_ctx.ns, + ) + } + } +} + +impl PixelSampleStateStorageView { + #[cfg(feature = "use_gpu")] + pub unsafe fn get_samples(&self, index: u32) -> RaySamples { + let i = index as usize; + + let (dir, ind, ss) = unsafe { + ( + *self.rs_direct_packed.add(i), + *self.rs_indirect_packed.add(i), + *self.rs_subsurface_packed.add(i), + ) + }; + + let direct_u = Point2f::new(dir.v[0], dir.v[1]); + let direct_uc = dir.v[2]; + let flags = dir.v[3] as i32; + let have_subsurface = (flags & 1) != 0; + + let indirect_uc = ind.v[0]; + let indirect_rr = ind.v[1]; + let indirect_u = Point2f::new(ind.v[2], ind.v[3]); + + let subsurface_uc = ss.v[0]; + let subsurface_u = Point2f::new(ss.v[1], ss.v[2]); + + RaySamples { + direct: RaySamplesDirect { + u: direct_u, + uc: direct_uc, + }, + indirect: RaySamplesIndirect { + uc: indirect_uc, + rr: indirect_rr, + u: indirect_u, + }, + have_subsurface, + subsurface: RaySamplesSubsurface { + uc: subsurface_uc, + u: subsurface_u, + }, + } + } + + #[cfg(feature = "use_gpu")] + pub unsafe fn set_samples(&self, index: u32, rs: RaySamples) { + if index >= self.capacity { + return; + } + let i = index as usize; + + let flags = if rs.have_subsurface { 1.0 } else { 0.0 }; + let dir = Float4 { + v: [rs.direct.u.0[0], rs.direct.u.0[1], rs.direct.uc, flags], + }; + + let ind = Float4 { + v: [ + rs.indirect.uc, + rs.indirect.rr, + rs.indirect.u.0[0], + rs.indirect.u.0[1], + ], + }; + + unsafe { + *self.rs_direct_packed.add(i) = dir; + *self.rs_indirect_packed.add(i) = ind; + } + + if rs.have_subsurface { + let ss = Float4 { + v: [ + rs.subsurface.uc, + rs.subsurface.u.0[0], + rs.subsurface.u.0[1], + 0.0, + ], + }; + unsafe { + *self.rs_subsurface_packed.add(i) = ss; + } + } + } +} diff --git a/src/gpu/mod.rs b/src/gpu/mod.rs new file mode 100644 index 0000000..216781b --- /dev/null +++ b/src/gpu/mod.rs @@ -0,0 +1,5 @@ +pub mod driver; +pub mod memory; +pub mod wavefront; + +pub use driver::launch_scale_kernel; diff --git a/src/gpu/wavefront/mod.rs b/src/gpu/wavefront/mod.rs new file mode 100644 index 0000000..065f6b8 --- /dev/null +++ b/src/gpu/wavefront/mod.rs @@ -0,0 +1,45 @@ +// use crate::core::scene::BasicScene; +use crate::{ + EscapedRayQueue, GetBSSRDFAndProbeRayQueue, HitAreaLightQueue, MaterialEvalQueue, + MediumSampleQueue, MediumScatterQueue, PixelSampleStateStorage, RayQueue, ShadowRayQueue, + SubsurfaceScatterQueue, +}; +use shared::core::camera::Camera; +use shared::core::film::Film; +use shared::core::filter::Filter; +use shared::core::light::Light; +use shared::core::sampler::Sampler; +use shared::lights::LightSampler; +use std::sync::Arc; + +pub struct WavefrontPathIntegrator { + pub film: Film, + pub filter: Filter, + pub sampler: Sampler, + pub camera: Arc, + pub light_sampler: LightSampler, + pub infinite_lights: Option>>, + pub max_depth: i32, + pub samples_per_pixel: i32, + pub regularize: bool, + pub scanlines_per_pixel: i32, + pub max_queue_size: i32, + pub pixel_sample_state: PixelSampleStateStorage, + pub ray_queue: [RayQueue; 2], + pub hit_area_light_queue: HitAreaLightQueue, + pub shadow_ray_queue: ShadowRayQueue, + pub escaped_ray_queue: Option, + pub basic_material_queue: Option, + pub universal_material_queue: Option, + pub medium_sample_queue: Option, + pub medium_scatter_queue: Option, + pub bssrf_queue: Option, + pub subsurface_queue: Option, +} + +#[cfg(feature = "use_gpu")] +impl WavefrontPathIntegrator { + pub fn new(scene: BasicScene) -> Self { + todo!() + } +}