Working on fixing import errors, standardizing CPU/GPU container types

This commit is contained in:
pingu 2026-01-18 16:25:56 +00:00
parent c412b6d668
commit 3b3f9eb155
11 changed files with 1171 additions and 0 deletions

184
src/core/medium.rs Normal file
View file

@ -0,0 +1,184 @@
use crate::spectra::dense::DenselySampledSpectrumBuffer;
use shared::core::geometry::Bounds3f;
use shared::core::medium::{GridMedium, HomogeneousMedium, RGBGridMedium};
use shared::spectra::{RGBIlluminantSpectrum, RGBUnboundedSpectrum};
use shared::utils::Transform;
use shared::utils::containers::SampledGrid;
use shared::{Float, core::medium::MajorantGrid};
pub struct MajorantGridHost {
pub device: MajorantGrid,
voxels: Vec<Float>,
}
impl MajorantGridHost {
pub fn new(bounds: Bounds3f, res: Point3i) -> Self {
let n = (res.x() * res.y() * res.z()) as usize;
let voxels = vec![0.0; n];
let device = MajorantGrid {
bounds,
res,
voxels: std::ptr::null_mut(),
n_voxels: n as u32,
};
Self { device, voxels }
}
}
pub trait RGBGridMediumCreator {
fn new(
bounds: &Bounds3f,
render_from_medium: &Transform,
g: Float,
sigma_a_grid: SampledGrid<RGBUnboundedSpectrum>,
sigma_s_grid: SampledGrid<RGBUnboundedSpectrum>,
sigma_scale: Float,
le_grid: SampledGrid<RGBIlluminantSpectrum>,
le_scale: Float,
) -> Self;
}
impl RGBGridMediumCreator for RGBGridMedium {
#[allow(clippy::too_many_arguments)]
fn new(
bounds: &Bounds3f,
render_from_medium: &Transform,
g: Float,
sigma_a_grid: SampledGrid<RGBUnboundedSpectrum>,
sigma_s_grid: SampledGrid<RGBUnboundedSpectrum>,
sigma_scale: Float,
le_grid: SampledGrid<RGBIlluminantSpectrum>,
le_scale: Float,
) -> Self {
let majorant_grid = MajorantGrid::new(*bounds, Point3i::new(16, 16, 16));
for z in 0..majorant_grid.res.x() {
for y in 0..majorant_grid.res.y() {
for x in 0..majorant_grid.res.x() {
let bounds = majorant_grid.voxel_bounds(x, y, z);
let convert = |s: &RGBUnboundedSpectrum| s.max_value();
let max_sigma_t = sigma_a_grid.max_value_convert(bounds, convert)
+ sigma_s_grid.max_value_convert(bounds, convert);
majorant_grid.set(x, y, z, sigma_scale * max_sigma_t);
}
}
}
Self {
bounds: *bounds,
render_from_medium: *render_from_medium,
le_grid,
le_scale,
phase: HGPhaseFunction::new(g),
sigma_a_grid,
sigma_s_grid,
sigma_scale,
majorant_grid,
}
}
}
pub trait GridMediumCreator {
pub fn new(
bounds: &Bounds3f,
render_from_medium: &Transform,
sigma_a: &Spectrum,
sigma_s: &Spectrum,
sigma_scale: Float,
g: Float,
density_grid: SampledGrid<Float>,
temperature_grid: SampledGrid<Float>,
le: &Spectrum,
le_scale: SampledGrid<Float>,
) -> Self;
}
impl GridMediumCreator for GridMedium {
#[allow(clippy::too_many_arguments)]
fn new(
bounds: &Bounds3f,
render_from_medium: &Transform,
sigma_a: &Spectrum,
sigma_s: &Spectrum,
sigma_scale: Float,
g: Float,
density_grid: SampledGrid<Float>,
temperature_grid: SampledGrid<Float>,
le: &Spectrum,
le_scale: SampledGrid<Float>,
) -> Self {
let mut sigma_a_spec = DenselySampledSpectrum::from_spectrum(sigma_a);
let mut sigma_s_spec = DenselySampledSpectrum::from_spectrum(sigma_s);
let le_spec = DenselySampledSpectrum::from_spectrum(le);
sigma_a_spec.scale(sigma_scale);
sigma_s_spec.scale(sigma_scale);
let mut majorant_grid = MajorantGrid::new(*bounds, Point3i::new(16, 16, 16));
let is_emissive = if temperature_grid.is_some() {
true
} else {
le_spec.max_value() > 0.
};
for z in 0..majorant_grid.res.z() {
for y in 0..majorant_grid.res.y() {
for x in 0..majorant_grid.res.x() {
let bounds = majorant_grid.voxel_bounds(x, y, z);
majorant_grid.set(x, y, z, density_grid.max_value(bounds));
}
}
}
Self {
bounds: *bounds,
render_from_medium: *render_from_medium,
sigma_a_spec,
sigma_s_spec,
density_grid,
phase: HGPhaseFunction::new(g),
temperature_grid,
le_spec,
le_scale,
is_emissive,
majorant_grid,
}
}
}
pub trait HomogeneousMediumCreator {
fn new(
sigma_a: Spectrum,
sigma_s: Spectrum,
sigma_scale: Float,
le: Spectrum,
le_scale: Float,
g: Float,
) -> Self;
}
impl HomogeneousMediumCreator for HomogeneousMedium {
fn new(
sigma_a: Spectrum,
sigma_s: Spectrum,
sigma_scale: Float,
le: Spectrum,
le_scale: Float,
g: Float,
) -> Self {
let mut sigma_a_spec = DenselySampledSpectrumBuffer::from_spectrum(&sigma_a);
let mut sigma_s_spec = DenselySampledSpectrumBuffer::from_spectrum(&sigma_s);
let mut le_spec = DenselySampledSpectrum::from_spectrum(&le);
sigma_a_spec.scale(sigma_scale);
sigma_s_spec.scale(sigma_scale);
le_spec.scale(le_scale);
Self {
sigma_a_spec,
sigma_s_spec,
le_spec,
phase: HGPhaseFunction::new(g),
}
}
}

1
src/filters/boxf.rs Normal file
View file

@ -0,0 +1 @@
pub trait BoxFilterCreator {}

31
src/filters/gaussian.rs Normal file
View file

@ -0,0 +1,31 @@
use crate::core::filter::CreateFilterSampler;
use shared::Float;
use shared::core::filter::FilterSampler;
use shared::core::geometry::Vector2f;
use shared::filters::GaussianFilter;
use shared::utils::math::gaussian;
pub trait GaussianFilterCreator {
fn new(radius: Vector2f, sigma: Float) -> Self;
}
impl GaussianFilterCreator for GaussianFilter {
fn new(radius: Vector2f, sigma: Float) -> Self {
let exp_x = gaussian(radius.x(), 0., sigma);
let exp_y = gaussian(radius.y(), 0., sigma);
let sampler = FilterSampler::new(radius, move |p: Point2f| {
let gx = (gaussian(p.x(), 0., sigma) - exp_x).max(0.0);
let gy = (gaussian(p.y(), 0., sigma) - exp_y).max(0.0);
gx * gy
});
Self {
radius,
sigma,
exp_x: gaussian(radius.x(), 0., sigma),
exp_y: gaussian(radius.y(), 0., sigma),
sampler,
}
}
}

22
src/filters/lanczos.rs Normal file
View file

@ -0,0 +1,22 @@
use shared::Float;
use shared::core::geometry::{Point2f, Vector2f};
use shared::filters::LanczosSincFilter;
use shared::utils::math::windowed_sinc;
pub trait LanczosFilterCreator {
fn new(radius: Vector2f, tau: Float) -> Self;
}
impl LanczosFilterCreator for LanczosSincFilter {
fn new(radius: Vector2f, tau: Float) -> Self {
let sampler = FilterSampler::new(radius, move |p: Point2f| {
windowed_sinc(p.x(), radius.x(), tau) * windowed_sinc(p.y(), radius.y(), tau)
});
Self {
radius,
tau,
sampler,
}
}
}

24
src/filters/mitchell.rs Normal file
View file

@ -0,0 +1,24 @@
use shared::Float;
use shared::core::geometry::{Point2f, Vector2f};
use shared::filters::MitchellFilter;
pub trait MitchellFilterCreator {
fn new(radius: Vector2f, b: Float, c: Float) -> Self;
}
impl MitchellFilterCreator for MitchellFilter {
fn new(radius: Vector2f, b: Float, c: Float) -> Self {
let sampler = FilterSampler::new(radius, move |p: Point2f| {
let nx = 2.0 * p.x() / radius.x();
let ny = 2.0 * p.y() / radius.y();
Self::mitchell_1d_eval(b, c, nx) * Self::mitchell_1d_eval(b, c, ny)
});
Self {
radius,
b,
c,
sampler,
}
}
}

5
src/filters/mod.rs Normal file
View file

@ -0,0 +1,5 @@
pub mod boxf;
pub mod gaussian;
pub mod lanczos;
pub mod mitchell;
pub mod triangle;

0
src/filters/triangle.rs Normal file
View file

320
src/gpu/driver.rs Normal file
View file

@ -0,0 +1,320 @@
use bytemuck::{Pod, Zeroable};
use cust::context::{CacheConfig, CurrentContext, ResourceLimit};
use cust::device::DeviceAttribute;
use cust::memory::DeviceCopy;
use cust::prelude::*;
use lazy_static::lazy_static;
use parking_lot::Mutex;
use std::error::Error;
use std::sync::Arc;
use shared::Float;
use shared::core::geometry::{Normal, Point, Vector};
use shared::core::options::get_options;
use shared::spectra::{SampledSpectrum, SampledWavelengths};
use shared::utils::interval::Interval;
#[macro_export]
macro_rules! impl_gpu_traits {
($name:ty) => {
unsafe impl DeviceCopy for $name {}
unsafe impl Zeroable for $name {}
unsafe impl Pod for $name {}
};
}
#[macro_export]
macro_rules! impl_math_gpu_traits {
($Struct:ident) => {
#[cfg(feature = "use_gpu")]
unsafe impl<T, const N: usize> DeviceCopy for $Struct<T, N> where T: DeviceCopy + Copy {}
unsafe impl<T, const N: usize> Zeroable for $Struct<T, N> where T: Zeroable {}
unsafe impl<T, const N: usize> Pod for $Struct<T, N> where T: Pod {}
};
}
impl_math_gpu_traits!(Vector);
impl_math_gpu_traits!(Normal);
impl_math_gpu_traits!(Point);
impl_gpu_traits!(Interval);
impl_gpu_traits!(Float4);
impl_gpu_traits!(SampledSpectrum);
impl_gpu_traits!(SampledWavelengths);
#[repr(C, align(16))]
#[derive(Clone, Copy, Debug, Default, PartialEq)]
pub struct Float4 {
pub v: [f32; 4],
}
pub type Vec4 = Vector<Float, 4>;
impl From<Vec4> for Float4 {
#[inline]
fn from(vec: Vector<f32, 4>) -> Self {
Self { v: vec.0 }
}
}
impl From<Float4> for Vec4 {
#[inline]
fn from(storage: Float4) -> Self {
Vector(storage.v)
}
}
struct KernelStats {
description: String,
num_launches: usize,
sum_ms: f32,
min_ms: f32,
max_ms: f32,
}
impl KernelStats {
fn new(description: &str) -> Self {
Self {
description: description.to_string(),
num_launches: 0,
sum_ms: 0.0,
min_ms: 0.0,
max_ms: 0.0,
}
}
}
struct ProfilerEvent {
start: Event,
stop: Event,
active: bool,
stats: Option<Arc<Mutex<KernelStats>>>,
}
impl ProfilerEvent {
fn new() -> Result<Self, cust::error::CudaError> {
let start = Event::new(EventFlags::DEFAULT)?;
let stop = Event::new(EventFlags::DEFAULT)?;
Ok(Self {
start,
stop,
active: false,
stats: None,
})
}
fn sync(&mut self) {
if !self.active {
return;
}
if self.stop.synchronize().is_ok() {
match self.stop.elapsed_time_f32(&self.start) {
Ok(ms) => {
if let Some(stats_arc) = &self.stats {
let mut stats = stats_arc.lock();
stats.num_launches += 1;
if stats.num_launches == 1 {
stats.sum_ms = ms;
stats.min_ms = ms;
stats.max_ms = ms;
} else {
stats.sum_ms += ms;
stats.min_ms = stats.min_ms.min(ms);
stats.max_ms = stats.max_ms.max(ms);
}
}
}
Err(e) => log::error!("Failed to get elapsed time: {:?}", e),
}
}
self.active = false;
}
}
struct Profiler {
kernel_stats: Vec<Arc<Mutex<KernelStats>>>,
event_pool: Vec<ProfilerEvent>,
pool_offset: usize,
}
impl Profiler {
fn new() -> Self {
Self {
kernel_stats: Vec::new(),
event_pool: Vec::new(),
pool_offset: 0,
}
}
fn prepare<'a>(&'a mut self, description: &str) -> &'a mut ProfilerEvent {
if self.event_pool.is_empty() {
for _ in 0..128 {
if let Ok(e) = ProfilerEvent::new() {
self.event_pool.push(e);
}
}
}
if self.pool_offset >= self.event_pool.len() {
self.pool_offset = 0;
}
let idx = self.pool_offset;
self.pool_offset += 1;
let pe = &mut self.event_pool[idx];
if pe.active {
pe.sync();
}
pe.active = true;
pe.stats = None;
let mut found = None;
for s in &self.kernel_stats {
if s.lock().description == description {
found = Some(s.clone());
break;
}
}
if found.is_none() {
let new_stats = Arc::new(Mutex::new(KernelStats::new(description)));
self.kernel_stats.push(new_stats.clone());
found = Some(new_stats);
}
pe.stats = found;
pe
}
}
pub struct GpuState {
pub context: Context,
pub stream: Stream,
profiler: Profiler,
}
impl GpuState {
fn init(device_index: u32) -> Result<Self, Box<dyn Error>> {
match cust::init(CudaFlags::empty()) {
Ok(_) => {}
Err(cust::error::CudaError::SharedObjectInitFailed) => {}
Err(e) => return Err(Box::new(e)),
}
let device = Device::get_device(device_index)?;
let name = device.name().unwrap_or_else(|_| "Unknown".into());
log::info!("Selected GPU: {}", name);
let has_unified = device
.get_attribute(DeviceAttribute::UnifiedAddressing)
.unwrap_or(0);
if has_unified == 0 {
panic!("Selected GPU does not support unified addressing.");
}
let context = Context::new(device)?;
CurrentContext::set_resource_limit(ResourceLimit::StackSize, 8192)?;
CurrentContext::set_resource_limit(ResourceLimit::PrintfFifoSize, 32 * 1024 * 1024)?;
CurrentContext::set_cache_config(CacheConfig::PreferL1)?;
let stream = Stream::new(StreamFlags::DEFAULT, None)?;
Ok(Self {
context,
stream,
profiler: Profiler::new(),
})
}
}
lazy_static! {
pub static ref GPU_STATE: Mutex<Option<GpuState>> = Mutex::new(None);
}
pub fn gpu_init() {
if !get_options().use_gpu {
return;
}
let device_id = get_options().gpu_device.unwrap_or(0);
log::info!("Initializing GPU Device {}", device_id);
match GpuState::init(device_id) {
Ok(state) => {
#[cfg(feature = "use_nvtx")]
nvtx::name_thread("MAIN_THREAD");
*GPU_STATE.lock() = Some(state);
}
Err(e) => {
panic!("Failed to initialize GPU: {:?}", e);
}
}
}
pub fn gpu_thread_init() {
if let Some(state) = GPU_STATE.lock().as_ref() {
if let Err(e) = CurrentContext::set_current(&state.context) {
log::error!("Failed to set CUDA context for thread: {:?}", e);
}
}
}
const SANITY_PTX: &str = r#"
.version 6.5
.target sm_30
.address_size 64
.visible .entry scale_array_kernel(.param .u64 ptr, .param .u32 n, .param .f32 scale) {
.reg .u64 %ptr; .reg .u32 %n; .reg .f32 %scale;
.reg .u32 %tid; .reg .u32 %ntid; .reg .u32 %ctid; .reg .u32 %idx;
.reg .pred %p; .reg .u64 %addr; .reg .f32 %val;
ld.param.u64 %ptr, [ptr];
ld.param.u32 %n, [n];
ld.param.f32 %scale, [scale];
mov.u32 %tid, %tid.x;
mov.u32 %ntid, %ntid.x;
mov.u32 %ctid, %ctaid.x;
mad.lo.s32 %idx, %ctid, %ntid, %tid;
setp.ge.u32 %p, %idx, %n;
@%p ret;
mul.wide.u32 %addr, %idx, 4;
add.u64 %addr, %ptr, %addr;
ld.global.f32 %val, [%addr];
mul.f32 %val, %val, %scale;
st.global.f32 [%addr], %val;
}
"#;
pub fn launch_scale_kernel(ptr: *mut f32, len: usize, scale: f32) -> Result<(), Box<dyn Error>> {
// Note: quick_init works best for isolated tests.
// If the main engine runs, this might conflict, but for the sanity test it is correct.
let _ctx = cust::quick_init()?;
let module = Module::from_str(SANITY_PTX)?;
let stream = Stream::new(StreamFlags::NON_BLOCKING, None)?;
let mut kernel = module.get_function("scale_array_kernel")?;
let block_size = 256;
let grid_size = (len as u32 + block_size - 1) / block_size;
unsafe {
launch!(
kernel<<<grid_size, block_size, 0, stream>>>(
ptr as u64,
len as u32,
scale
)
)?;
}
stream.synchronize()?;
Ok(())
}

534
src/gpu/memory.rs Normal file
View file

@ -0,0 +1,534 @@
#![allow(clippy::too_many_arguments)]
use super::Float4;
use crate::Float;
use crate::core::geometry::{Normal3f, Point2f, Point2i, Point3f, Point3fi, Ray, Vector3f};
use crate::lights::LightSampleContext;
use crate::spectra::{SampledSpectrum, SampledWavelengths};
// use cust::memory::{CopyDestination, DeviceMemory};
// use cust::prelude::*;
#[macro_export]
macro_rules! soa_struct {
(
$(#[$outer:meta])*
pub struct $name:ident {
$(
pub $field:ident : $type:ty
),* $(,)?
}
) => {
#[cfg(feature = "use_gpu")]
$(#[$outer])*
pub struct $name {
capacity: u32,
pub count: cust::memory::DeviceBuffer<u32>,
$(
pub $field: cust::memory::DeviceBuffer<$type>,
)*
}
#[cfg(feature = "use_gpu")]
impl $name {
pub fn new(capacity: usize) -> cust::error::CudaResult<Self> {
use cust::memory::DeviceBuffer;
Ok(Self {
capacity: capacity as u32,
count: DeviceBuffer::zeroed(1)?,
$(
$field: DeviceBuffer::zeroed(capacity)?,
)*
})
}
pub fn len(&self) -> cust::error::CudaResult<u32> {
let mut host_count = [0u32; 1];
self.count.copy_to(&mut host_count)?;
Ok(host_count[0])
}
pub fn reset(&mut self) -> cust::error::CudaResult<()> {
self.count.copy_from(&[0])
}
// Generate the View name
pub fn as_view(&mut self) -> paste::paste! { [<$name View>] } {
paste::paste! {
[<$name View>] {
capacity: self.capacity,
count: self.count.as_device_ptr().as_mut_ptr(),
$(
$field: self.$field.as_device_ptr().as_raw() as *mut $type,
)*
}
}
}
}
paste::paste! {
#[repr(C)]
#[derive(Clone, Copy)]
pub struct [<$name View>] {
pub capacity: u32,
pub count: *mut u32,
$(
pub $field: *mut $type,
)*
}
unsafe impl cust::memory::DeviceCopy for [<$name View>] {}
impl [<$name View>] {
// The raw push that fills every field
#[cfg(feature = "use_gpu")]
pub unsafe fn push(&self, $( $field : $type ),* ) -> Option<u32> {
use core::sync::atomic::{AtomicU32, Ordering};
let index = unsafe {
let counter_ptr = self.count as *mut AtomicU32;
(*counter_ptr).fetch_add(1, Ordering::Relaxed)
};
if index >= self.capacity {
return None;
}
unsafe {
$(
*self.$field.add(index as usize) = $field;
)*
}
Some(index)
}
#[cfg(feature = "use_gpu")]
pub unsafe fn size(&self) -> u32 {
use core::sync::atomic::{AtomicU32, Ordering};
unsafe {
(*(self.count as *const AtomicU32)).load(Ordering::Relaxed)
}
}
$(
#[cfg(feature = "use_gpu")]
pub fn [<$field _ptr>](&self) -> *mut $type {
self.$field
}
)*
}
}
};
}
#[repr(C)]
#[derive(Clone, Copy, Default)]
pub struct RaySamplesDirect {
pub u: Point2f,
pub uc: Float,
}
#[repr(C)]
#[derive(Clone, Copy, Default)]
pub struct RaySamplesIndirect {
pub uc: Float,
pub rr: Float,
pub u: Point2f,
}
#[repr(C)]
#[derive(Clone, Copy, Default)]
pub struct RaySamplesSubsurface {
pub uc: Float,
pub u: Point2f,
}
#[repr(C)]
#[derive(Clone, Copy, Default)]
pub struct RaySamples {
pub direct: RaySamplesDirect,
pub indirect: RaySamplesIndirect,
pub have_subsurface: bool,
pub subsurface: RaySamplesSubsurface,
}
soa_struct! {
pub struct RayQueue {
pub ray_o: Point3f,
pub ray_d: Vector3f,
pub depth: i32,
pub lambda: SampledWavelengths,
pub pixel_index: u32,
pub beta: SampledSpectrum,
pub r_u: SampledSpectrum,
pub r_l: SampledSpectrum,
pub ctx_pi: Point3f,
pub ctx_n: Normal3f,
pub ctx_ns: Normal3f,
pub eta_scale: Float,
pub specular_bounce: u32,
pub any_non_specular_bounces: u32,
}
}
soa_struct! {
pub struct PixelSampleStateStorage {
pub p_pixel: Point2i,
pub l: SampledSpectrum,
pub lambda: SampledWavelengths,
pub filter_weight: Float,
pub visible_surface: u32,
pub camera_ray_weight: SampledSpectrum,
pub rs_direct_packed: Float4,
pub rs_indirect_packed: Float4,
pub rs_subsurface_packed: Float4,
}
}
soa_struct! {
pub struct EscapedRayQueue {
pub ray_o: Point3f,
pub ray_d: Vector3f,
pub depth: i32,
pub lambda: SampledWavelengths,
pub pixel_index: u32,
pub beta: SampledSpectrum,
pub specular_bounce: u32,
pub r_u: SampledSpectrum,
pub r_l: SampledSpectrum,
pub ctx_pi: Point3f,
pub ctx_n: Normal3f,
pub ctx_ns: Normal3f,
}
}
soa_struct! {
pub struct HitAreaLightQueue {
pub area_light_id: u32, // Light ID
pub p: Point3f,
pub n: Normal3f,
pub uv: Point2f,
pub wo: Vector3f,
pub lambda: SampledWavelengths,
pub depth: i32,
pub beta: SampledSpectrum,
pub r_u: SampledSpectrum,
pub r_l: SampledSpectrum,
pub ctx_pi: Point3f,
pub ctx_n: Normal3f,
pub ctx_ns: Normal3f,
pub specular_bounce: u32,
pub pixel_index: u32,
}
}
soa_struct! {
pub struct ShadowRayQueue {
pub ray_o: Point3f,
pub ray_d: Vector3f,
pub t_max: Float,
pub lambda: SampledWavelengths,
pub ld: SampledSpectrum,
pub r_u: SampledSpectrum,
pub r_l: SampledSpectrum,
pub pixel_index: u32,
}
}
soa_struct! {
pub struct GetBSSRDFAndProbeRayQueue {
pub material_id: u32,
pub lambda: SampledWavelengths,
pub beta: SampledSpectrum,
pub r_u: SampledSpectrum,
pub p: Point3f,
pub wo: Vector3f,
pub n: Normal3f,
pub ns: Normal3f,
pub dpdus: Vector3f,
pub uv: Point2f,
pub depth: i32,
pub mi_inside: u32,
pub mi_outside: u32,
pub eta_scale: Float,
pub pixel_index: u32,
}
}
soa_struct! {
pub struct SubsurfaceScatterQueue {
pub p0: Point3f,
pub p1: Point3f,
pub depth: i32,
pub material_id: u32,
pub lambda: SampledWavelengths,
pub beta: SampledSpectrum,
pub r_u: SampledSpectrum,
pub mi_inside: u32,
pub mi_outside: u32,
pub eta_scale: Float,
pub pixel_index: u32,
}
}
soa_struct! {
pub struct MediumSampleQueue {
pub ray_o: Point3f,
pub ray_d: Vector3f,
pub t_max: Float,
pub lambda: SampledWavelengths,
pub beta: SampledSpectrum,
pub r_u: SampledSpectrum,
pub r_l: SampledSpectrum,
pub pixel_index: u32,
pub ctx_pi: Point3f,
pub ctx_n: Normal3f,
pub ctx_ns: Normal3f,
pub specular_bounce: u32,
pub any_non_specular_bounces: u32,
pub eta_scale: Float,
pub area_light_id: u32,
pub pi: Point3fi,
pub n: Normal3f,
pub dpdu: Vector3f,
pub dpdv: Vector3f,
pub wo: Vector3f,
pub uv: Point2f,
pub material_id: u32,
pub ns: Normal3f,
pub dpdus: Vector3f,
pub dpdvs: Vector3f,
pub dndus: Normal3f,
pub dndvs: Normal3f,
pub face_index: i32,
pub mi_inside: u32,
pub mi_outside: u32,
}
}
soa_struct! {
pub struct MaterialEvalQueue {
pub material_id: u32,
pub pi: Point3fi,
pub n: Normal3f,
pub dpdu: Vector3f,
pub dpdv: Vector3f,
pub time: Float,
pub depth: i32,
pub ns: Normal3f,
pub dpdus: Vector3f,
pub dpdvs: Vector3f,
pub dndus: Normal3f,
pub dndvs: Normal3f,
pub uv: Point2f,
pub face_index: i32,
pub lambda: SampledWavelengths,
pub pixel_index: u32,
pub any_non_specular_bounces: u32,
pub wo: Vector3f,
pub beta: SampledSpectrum,
pub r_u: SampledSpectrum,
pub eta_scale: Float,
pub mi_inside: u32,
pub mi_outside: u32,
}
}
soa_struct! {
pub struct MediumScatterQueue {
pub p: Point3f,
pub depth: usize,
pub lambda: SampledWavelengths,
pub beta: SampledSpectrum,
pub r_u: SampledSpectrum,
pub wo: Vector3f,
pub time: Float,
pub eta_scale: Float,
pub pixel_index: usize,
// ID
pub phase_function: u32,
pub medium: u32,
}
}
#[repr(C)]
#[derive(Clone, Copy)]
pub struct RayWorkItem {
pub ray: Ray,
pub depth: i32,
pub lambda: SampledWavelengths,
pub pixel_index: u32,
pub beta: SampledSpectrum,
pub r_u: SampledSpectrum,
pub r_l: SampledSpectrum,
pub prev_intr_ctx: LightSampleContext,
pub eta_scale: Float,
pub specular_bounce: bool,
pub any_non_specular_bounces: bool,
}
#[repr(C)]
#[derive(Clone, Copy)]
pub struct EscapedRayWorkItem {
pub ray_o: Point3f,
pub ray_d: Vector3f,
pub depth: i32,
pub lambda: SampledWavelengths,
pub pixel_index: u32,
pub beta: SampledSpectrum,
pub specular_bounce: bool,
pub r_u: SampledSpectrum,
pub r_l: SampledSpectrum,
pub prev_intr_ctx: LightSampleContext,
}
#[repr(C)]
#[derive(Clone, Copy)]
pub struct ShadowRayWorkItem {
pub ray: Ray,
pub t_max: Float,
pub lambda: SampledWavelengths,
pub ld: SampledSpectrum,
pub r_u: SampledSpectrum,
pub r_l: SampledSpectrum,
pub pixel_index: u32,
}
impl RayQueueView {
#[cfg(feature = "use_gpu")]
pub unsafe fn push_work_item(&self, item: RayWorkItem) -> Option<u32> {
unsafe {
self.push(
item.ray.o,
item.ray.d,
item.depth,
item.lambda,
item.pixel_index,
item.beta,
item.r_u,
item.r_l,
item.prev_intr_ctx.pi.into(),
item.prev_intr_ctx.n,
item.prev_intr_ctx.ns,
item.eta_scale,
if item.specular_bounce { 1 } else { 0 },
if item.any_non_specular_bounces { 1 } else { 0 },
)
}
}
}
impl EscapedRayQueueView {
#[cfg(feature = "use_gpu")]
pub unsafe fn push_work_item(&self, r: &RayWorkItem) -> Option<u32> {
unsafe {
self.push(
r.ray.o,
r.ray.d,
r.depth,
r.lambda,
r.pixel_index,
r.beta,
if r.specular_bounce { 1 } else { 0 },
r.r_u,
r.r_l,
r.prev_intr_ctx.pi.into(),
r.prev_intr_ctx.n,
r.prev_intr_ctx.ns,
)
}
}
}
impl PixelSampleStateStorageView {
#[cfg(feature = "use_gpu")]
pub unsafe fn get_samples(&self, index: u32) -> RaySamples {
let i = index as usize;
let (dir, ind, ss) = unsafe {
(
*self.rs_direct_packed.add(i),
*self.rs_indirect_packed.add(i),
*self.rs_subsurface_packed.add(i),
)
};
let direct_u = Point2f::new(dir.v[0], dir.v[1]);
let direct_uc = dir.v[2];
let flags = dir.v[3] as i32;
let have_subsurface = (flags & 1) != 0;
let indirect_uc = ind.v[0];
let indirect_rr = ind.v[1];
let indirect_u = Point2f::new(ind.v[2], ind.v[3]);
let subsurface_uc = ss.v[0];
let subsurface_u = Point2f::new(ss.v[1], ss.v[2]);
RaySamples {
direct: RaySamplesDirect {
u: direct_u,
uc: direct_uc,
},
indirect: RaySamplesIndirect {
uc: indirect_uc,
rr: indirect_rr,
u: indirect_u,
},
have_subsurface,
subsurface: RaySamplesSubsurface {
uc: subsurface_uc,
u: subsurface_u,
},
}
}
#[cfg(feature = "use_gpu")]
pub unsafe fn set_samples(&self, index: u32, rs: RaySamples) {
if index >= self.capacity {
return;
}
let i = index as usize;
let flags = if rs.have_subsurface { 1.0 } else { 0.0 };
let dir = Float4 {
v: [rs.direct.u.0[0], rs.direct.u.0[1], rs.direct.uc, flags],
};
let ind = Float4 {
v: [
rs.indirect.uc,
rs.indirect.rr,
rs.indirect.u.0[0],
rs.indirect.u.0[1],
],
};
unsafe {
*self.rs_direct_packed.add(i) = dir;
*self.rs_indirect_packed.add(i) = ind;
}
if rs.have_subsurface {
let ss = Float4 {
v: [
rs.subsurface.uc,
rs.subsurface.u.0[0],
rs.subsurface.u.0[1],
0.0,
],
};
unsafe {
*self.rs_subsurface_packed.add(i) = ss;
}
}
}
}

5
src/gpu/mod.rs Normal file
View file

@ -0,0 +1,5 @@
pub mod driver;
pub mod memory;
pub mod wavefront;
pub use driver::launch_scale_kernel;

45
src/gpu/wavefront/mod.rs Normal file
View file

@ -0,0 +1,45 @@
// use crate::core::scene::BasicScene;
use crate::{
EscapedRayQueue, GetBSSRDFAndProbeRayQueue, HitAreaLightQueue, MaterialEvalQueue,
MediumSampleQueue, MediumScatterQueue, PixelSampleStateStorage, RayQueue, ShadowRayQueue,
SubsurfaceScatterQueue,
};
use shared::core::camera::Camera;
use shared::core::film::Film;
use shared::core::filter::Filter;
use shared::core::light::Light;
use shared::core::sampler::Sampler;
use shared::lights::LightSampler;
use std::sync::Arc;
pub struct WavefrontPathIntegrator {
pub film: Film,
pub filter: Filter,
pub sampler: Sampler,
pub camera: Arc<Camera>,
pub light_sampler: LightSampler,
pub infinite_lights: Option<Vec<Arc<Light>>>,
pub max_depth: i32,
pub samples_per_pixel: i32,
pub regularize: bool,
pub scanlines_per_pixel: i32,
pub max_queue_size: i32,
pub pixel_sample_state: PixelSampleStateStorage,
pub ray_queue: [RayQueue; 2],
pub hit_area_light_queue: HitAreaLightQueue,
pub shadow_ray_queue: ShadowRayQueue,
pub escaped_ray_queue: Option<EscapedRayQueue>,
pub basic_material_queue: Option<MaterialEvalQueue>,
pub universal_material_queue: Option<MaterialEvalQueue>,
pub medium_sample_queue: Option<MediumSampleQueue>,
pub medium_scatter_queue: Option<MediumScatterQueue>,
pub bssrf_queue: Option<GetBSSRDFAndProbeRayQueue>,
pub subsurface_queue: Option<SubsurfaceScatterQueue>,
}
#[cfg(feature = "use_gpu")]
impl WavefrontPathIntegrator {
pub fn new(scene: BasicScene) -> Self {
todo!()
}
}