Working on fixing import errors, standardizing CPU/GPU container types
This commit is contained in:
parent
c412b6d668
commit
3b3f9eb155
11 changed files with 1171 additions and 0 deletions
184
src/core/medium.rs
Normal file
184
src/core/medium.rs
Normal file
|
|
@ -0,0 +1,184 @@
|
|||
use crate::spectra::dense::DenselySampledSpectrumBuffer;
|
||||
use shared::core::geometry::Bounds3f;
|
||||
use shared::core::medium::{GridMedium, HomogeneousMedium, RGBGridMedium};
|
||||
use shared::spectra::{RGBIlluminantSpectrum, RGBUnboundedSpectrum};
|
||||
use shared::utils::Transform;
|
||||
use shared::utils::containers::SampledGrid;
|
||||
use shared::{Float, core::medium::MajorantGrid};
|
||||
|
||||
pub struct MajorantGridHost {
|
||||
pub device: MajorantGrid,
|
||||
voxels: Vec<Float>,
|
||||
}
|
||||
|
||||
impl MajorantGridHost {
|
||||
pub fn new(bounds: Bounds3f, res: Point3i) -> Self {
|
||||
let n = (res.x() * res.y() * res.z()) as usize;
|
||||
let voxels = vec![0.0; n];
|
||||
|
||||
let device = MajorantGrid {
|
||||
bounds,
|
||||
res,
|
||||
voxels: std::ptr::null_mut(),
|
||||
n_voxels: n as u32,
|
||||
};
|
||||
|
||||
Self { device, voxels }
|
||||
}
|
||||
}
|
||||
|
||||
pub trait RGBGridMediumCreator {
|
||||
fn new(
|
||||
bounds: &Bounds3f,
|
||||
render_from_medium: &Transform,
|
||||
g: Float,
|
||||
sigma_a_grid: SampledGrid<RGBUnboundedSpectrum>,
|
||||
sigma_s_grid: SampledGrid<RGBUnboundedSpectrum>,
|
||||
sigma_scale: Float,
|
||||
le_grid: SampledGrid<RGBIlluminantSpectrum>,
|
||||
le_scale: Float,
|
||||
) -> Self;
|
||||
}
|
||||
|
||||
impl RGBGridMediumCreator for RGBGridMedium {
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn new(
|
||||
bounds: &Bounds3f,
|
||||
render_from_medium: &Transform,
|
||||
g: Float,
|
||||
sigma_a_grid: SampledGrid<RGBUnboundedSpectrum>,
|
||||
sigma_s_grid: SampledGrid<RGBUnboundedSpectrum>,
|
||||
sigma_scale: Float,
|
||||
le_grid: SampledGrid<RGBIlluminantSpectrum>,
|
||||
le_scale: Float,
|
||||
) -> Self {
|
||||
let majorant_grid = MajorantGrid::new(*bounds, Point3i::new(16, 16, 16));
|
||||
for z in 0..majorant_grid.res.x() {
|
||||
for y in 0..majorant_grid.res.y() {
|
||||
for x in 0..majorant_grid.res.x() {
|
||||
let bounds = majorant_grid.voxel_bounds(x, y, z);
|
||||
let convert = |s: &RGBUnboundedSpectrum| s.max_value();
|
||||
let max_sigma_t = sigma_a_grid.max_value_convert(bounds, convert)
|
||||
+ sigma_s_grid.max_value_convert(bounds, convert);
|
||||
majorant_grid.set(x, y, z, sigma_scale * max_sigma_t);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Self {
|
||||
bounds: *bounds,
|
||||
render_from_medium: *render_from_medium,
|
||||
le_grid,
|
||||
le_scale,
|
||||
phase: HGPhaseFunction::new(g),
|
||||
sigma_a_grid,
|
||||
sigma_s_grid,
|
||||
sigma_scale,
|
||||
majorant_grid,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub trait GridMediumCreator {
|
||||
pub fn new(
|
||||
bounds: &Bounds3f,
|
||||
render_from_medium: &Transform,
|
||||
sigma_a: &Spectrum,
|
||||
sigma_s: &Spectrum,
|
||||
sigma_scale: Float,
|
||||
g: Float,
|
||||
density_grid: SampledGrid<Float>,
|
||||
temperature_grid: SampledGrid<Float>,
|
||||
le: &Spectrum,
|
||||
le_scale: SampledGrid<Float>,
|
||||
) -> Self;
|
||||
}
|
||||
|
||||
impl GridMediumCreator for GridMedium {
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn new(
|
||||
bounds: &Bounds3f,
|
||||
render_from_medium: &Transform,
|
||||
sigma_a: &Spectrum,
|
||||
sigma_s: &Spectrum,
|
||||
sigma_scale: Float,
|
||||
g: Float,
|
||||
density_grid: SampledGrid<Float>,
|
||||
temperature_grid: SampledGrid<Float>,
|
||||
le: &Spectrum,
|
||||
le_scale: SampledGrid<Float>,
|
||||
) -> Self {
|
||||
let mut sigma_a_spec = DenselySampledSpectrum::from_spectrum(sigma_a);
|
||||
let mut sigma_s_spec = DenselySampledSpectrum::from_spectrum(sigma_s);
|
||||
let le_spec = DenselySampledSpectrum::from_spectrum(le);
|
||||
sigma_a_spec.scale(sigma_scale);
|
||||
sigma_s_spec.scale(sigma_scale);
|
||||
|
||||
let mut majorant_grid = MajorantGrid::new(*bounds, Point3i::new(16, 16, 16));
|
||||
let is_emissive = if temperature_grid.is_some() {
|
||||
true
|
||||
} else {
|
||||
le_spec.max_value() > 0.
|
||||
};
|
||||
|
||||
for z in 0..majorant_grid.res.z() {
|
||||
for y in 0..majorant_grid.res.y() {
|
||||
for x in 0..majorant_grid.res.x() {
|
||||
let bounds = majorant_grid.voxel_bounds(x, y, z);
|
||||
majorant_grid.set(x, y, z, density_grid.max_value(bounds));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Self {
|
||||
bounds: *bounds,
|
||||
render_from_medium: *render_from_medium,
|
||||
sigma_a_spec,
|
||||
sigma_s_spec,
|
||||
density_grid,
|
||||
phase: HGPhaseFunction::new(g),
|
||||
temperature_grid,
|
||||
le_spec,
|
||||
le_scale,
|
||||
is_emissive,
|
||||
majorant_grid,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub trait HomogeneousMediumCreator {
|
||||
fn new(
|
||||
sigma_a: Spectrum,
|
||||
sigma_s: Spectrum,
|
||||
sigma_scale: Float,
|
||||
le: Spectrum,
|
||||
le_scale: Float,
|
||||
g: Float,
|
||||
) -> Self;
|
||||
}
|
||||
|
||||
impl HomogeneousMediumCreator for HomogeneousMedium {
|
||||
fn new(
|
||||
sigma_a: Spectrum,
|
||||
sigma_s: Spectrum,
|
||||
sigma_scale: Float,
|
||||
le: Spectrum,
|
||||
le_scale: Float,
|
||||
g: Float,
|
||||
) -> Self {
|
||||
let mut sigma_a_spec = DenselySampledSpectrumBuffer::from_spectrum(&sigma_a);
|
||||
let mut sigma_s_spec = DenselySampledSpectrumBuffer::from_spectrum(&sigma_s);
|
||||
let mut le_spec = DenselySampledSpectrum::from_spectrum(&le);
|
||||
|
||||
sigma_a_spec.scale(sigma_scale);
|
||||
sigma_s_spec.scale(sigma_scale);
|
||||
le_spec.scale(le_scale);
|
||||
|
||||
Self {
|
||||
sigma_a_spec,
|
||||
sigma_s_spec,
|
||||
le_spec,
|
||||
phase: HGPhaseFunction::new(g),
|
||||
}
|
||||
}
|
||||
}
|
||||
1
src/filters/boxf.rs
Normal file
1
src/filters/boxf.rs
Normal file
|
|
@ -0,0 +1 @@
|
|||
pub trait BoxFilterCreator {}
|
||||
31
src/filters/gaussian.rs
Normal file
31
src/filters/gaussian.rs
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
use crate::core::filter::CreateFilterSampler;
|
||||
use shared::Float;
|
||||
use shared::core::filter::FilterSampler;
|
||||
use shared::core::geometry::Vector2f;
|
||||
use shared::filters::GaussianFilter;
|
||||
use shared::utils::math::gaussian;
|
||||
|
||||
pub trait GaussianFilterCreator {
|
||||
fn new(radius: Vector2f, sigma: Float) -> Self;
|
||||
}
|
||||
|
||||
impl GaussianFilterCreator for GaussianFilter {
|
||||
fn new(radius: Vector2f, sigma: Float) -> Self {
|
||||
let exp_x = gaussian(radius.x(), 0., sigma);
|
||||
let exp_y = gaussian(radius.y(), 0., sigma);
|
||||
|
||||
let sampler = FilterSampler::new(radius, move |p: Point2f| {
|
||||
let gx = (gaussian(p.x(), 0., sigma) - exp_x).max(0.0);
|
||||
let gy = (gaussian(p.y(), 0., sigma) - exp_y).max(0.0);
|
||||
gx * gy
|
||||
});
|
||||
|
||||
Self {
|
||||
radius,
|
||||
sigma,
|
||||
exp_x: gaussian(radius.x(), 0., sigma),
|
||||
exp_y: gaussian(radius.y(), 0., sigma),
|
||||
sampler,
|
||||
}
|
||||
}
|
||||
}
|
||||
22
src/filters/lanczos.rs
Normal file
22
src/filters/lanczos.rs
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
use shared::Float;
|
||||
use shared::core::geometry::{Point2f, Vector2f};
|
||||
use shared::filters::LanczosSincFilter;
|
||||
use shared::utils::math::windowed_sinc;
|
||||
|
||||
pub trait LanczosFilterCreator {
|
||||
fn new(radius: Vector2f, tau: Float) -> Self;
|
||||
}
|
||||
|
||||
impl LanczosFilterCreator for LanczosSincFilter {
|
||||
fn new(radius: Vector2f, tau: Float) -> Self {
|
||||
let sampler = FilterSampler::new(radius, move |p: Point2f| {
|
||||
windowed_sinc(p.x(), radius.x(), tau) * windowed_sinc(p.y(), radius.y(), tau)
|
||||
});
|
||||
|
||||
Self {
|
||||
radius,
|
||||
tau,
|
||||
sampler,
|
||||
}
|
||||
}
|
||||
}
|
||||
24
src/filters/mitchell.rs
Normal file
24
src/filters/mitchell.rs
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
use shared::Float;
|
||||
use shared::core::geometry::{Point2f, Vector2f};
|
||||
use shared::filters::MitchellFilter;
|
||||
|
||||
pub trait MitchellFilterCreator {
|
||||
fn new(radius: Vector2f, b: Float, c: Float) -> Self;
|
||||
}
|
||||
|
||||
impl MitchellFilterCreator for MitchellFilter {
|
||||
fn new(radius: Vector2f, b: Float, c: Float) -> Self {
|
||||
let sampler = FilterSampler::new(radius, move |p: Point2f| {
|
||||
let nx = 2.0 * p.x() / radius.x();
|
||||
let ny = 2.0 * p.y() / radius.y();
|
||||
Self::mitchell_1d_eval(b, c, nx) * Self::mitchell_1d_eval(b, c, ny)
|
||||
});
|
||||
|
||||
Self {
|
||||
radius,
|
||||
b,
|
||||
c,
|
||||
sampler,
|
||||
}
|
||||
}
|
||||
}
|
||||
5
src/filters/mod.rs
Normal file
5
src/filters/mod.rs
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
pub mod boxf;
|
||||
pub mod gaussian;
|
||||
pub mod lanczos;
|
||||
pub mod mitchell;
|
||||
pub mod triangle;
|
||||
0
src/filters/triangle.rs
Normal file
0
src/filters/triangle.rs
Normal file
320
src/gpu/driver.rs
Normal file
320
src/gpu/driver.rs
Normal file
|
|
@ -0,0 +1,320 @@
|
|||
use bytemuck::{Pod, Zeroable};
|
||||
use cust::context::{CacheConfig, CurrentContext, ResourceLimit};
|
||||
use cust::device::DeviceAttribute;
|
||||
use cust::memory::DeviceCopy;
|
||||
use cust::prelude::*;
|
||||
use lazy_static::lazy_static;
|
||||
use parking_lot::Mutex;
|
||||
use std::error::Error;
|
||||
use std::sync::Arc;
|
||||
|
||||
use shared::Float;
|
||||
use shared::core::geometry::{Normal, Point, Vector};
|
||||
use shared::core::options::get_options;
|
||||
use shared::spectra::{SampledSpectrum, SampledWavelengths};
|
||||
use shared::utils::interval::Interval;
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! impl_gpu_traits {
|
||||
($name:ty) => {
|
||||
unsafe impl DeviceCopy for $name {}
|
||||
unsafe impl Zeroable for $name {}
|
||||
unsafe impl Pod for $name {}
|
||||
};
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! impl_math_gpu_traits {
|
||||
($Struct:ident) => {
|
||||
#[cfg(feature = "use_gpu")]
|
||||
unsafe impl<T, const N: usize> DeviceCopy for $Struct<T, N> where T: DeviceCopy + Copy {}
|
||||
|
||||
unsafe impl<T, const N: usize> Zeroable for $Struct<T, N> where T: Zeroable {}
|
||||
|
||||
unsafe impl<T, const N: usize> Pod for $Struct<T, N> where T: Pod {}
|
||||
};
|
||||
}
|
||||
|
||||
impl_math_gpu_traits!(Vector);
|
||||
impl_math_gpu_traits!(Normal);
|
||||
impl_math_gpu_traits!(Point);
|
||||
impl_gpu_traits!(Interval);
|
||||
impl_gpu_traits!(Float4);
|
||||
impl_gpu_traits!(SampledSpectrum);
|
||||
impl_gpu_traits!(SampledWavelengths);
|
||||
|
||||
#[repr(C, align(16))]
|
||||
#[derive(Clone, Copy, Debug, Default, PartialEq)]
|
||||
pub struct Float4 {
|
||||
pub v: [f32; 4],
|
||||
}
|
||||
pub type Vec4 = Vector<Float, 4>;
|
||||
|
||||
impl From<Vec4> for Float4 {
|
||||
#[inline]
|
||||
fn from(vec: Vector<f32, 4>) -> Self {
|
||||
Self { v: vec.0 }
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Float4> for Vec4 {
|
||||
#[inline]
|
||||
fn from(storage: Float4) -> Self {
|
||||
Vector(storage.v)
|
||||
}
|
||||
}
|
||||
|
||||
struct KernelStats {
|
||||
description: String,
|
||||
num_launches: usize,
|
||||
sum_ms: f32,
|
||||
min_ms: f32,
|
||||
max_ms: f32,
|
||||
}
|
||||
|
||||
impl KernelStats {
|
||||
fn new(description: &str) -> Self {
|
||||
Self {
|
||||
description: description.to_string(),
|
||||
num_launches: 0,
|
||||
sum_ms: 0.0,
|
||||
min_ms: 0.0,
|
||||
max_ms: 0.0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct ProfilerEvent {
|
||||
start: Event,
|
||||
stop: Event,
|
||||
active: bool,
|
||||
stats: Option<Arc<Mutex<KernelStats>>>,
|
||||
}
|
||||
|
||||
impl ProfilerEvent {
|
||||
fn new() -> Result<Self, cust::error::CudaError> {
|
||||
let start = Event::new(EventFlags::DEFAULT)?;
|
||||
let stop = Event::new(EventFlags::DEFAULT)?;
|
||||
Ok(Self {
|
||||
start,
|
||||
stop,
|
||||
active: false,
|
||||
stats: None,
|
||||
})
|
||||
}
|
||||
|
||||
fn sync(&mut self) {
|
||||
if !self.active {
|
||||
return;
|
||||
}
|
||||
|
||||
if self.stop.synchronize().is_ok() {
|
||||
match self.stop.elapsed_time_f32(&self.start) {
|
||||
Ok(ms) => {
|
||||
if let Some(stats_arc) = &self.stats {
|
||||
let mut stats = stats_arc.lock();
|
||||
stats.num_launches += 1;
|
||||
if stats.num_launches == 1 {
|
||||
stats.sum_ms = ms;
|
||||
stats.min_ms = ms;
|
||||
stats.max_ms = ms;
|
||||
} else {
|
||||
stats.sum_ms += ms;
|
||||
stats.min_ms = stats.min_ms.min(ms);
|
||||
stats.max_ms = stats.max_ms.max(ms);
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => log::error!("Failed to get elapsed time: {:?}", e),
|
||||
}
|
||||
}
|
||||
self.active = false;
|
||||
}
|
||||
}
|
||||
|
||||
struct Profiler {
|
||||
kernel_stats: Vec<Arc<Mutex<KernelStats>>>,
|
||||
event_pool: Vec<ProfilerEvent>,
|
||||
pool_offset: usize,
|
||||
}
|
||||
|
||||
impl Profiler {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
kernel_stats: Vec::new(),
|
||||
event_pool: Vec::new(),
|
||||
pool_offset: 0,
|
||||
}
|
||||
}
|
||||
|
||||
fn prepare<'a>(&'a mut self, description: &str) -> &'a mut ProfilerEvent {
|
||||
if self.event_pool.is_empty() {
|
||||
for _ in 0..128 {
|
||||
if let Ok(e) = ProfilerEvent::new() {
|
||||
self.event_pool.push(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if self.pool_offset >= self.event_pool.len() {
|
||||
self.pool_offset = 0;
|
||||
}
|
||||
|
||||
let idx = self.pool_offset;
|
||||
self.pool_offset += 1;
|
||||
|
||||
let pe = &mut self.event_pool[idx];
|
||||
|
||||
if pe.active {
|
||||
pe.sync();
|
||||
}
|
||||
|
||||
pe.active = true;
|
||||
pe.stats = None;
|
||||
|
||||
let mut found = None;
|
||||
for s in &self.kernel_stats {
|
||||
if s.lock().description == description {
|
||||
found = Some(s.clone());
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if found.is_none() {
|
||||
let new_stats = Arc::new(Mutex::new(KernelStats::new(description)));
|
||||
self.kernel_stats.push(new_stats.clone());
|
||||
found = Some(new_stats);
|
||||
}
|
||||
|
||||
pe.stats = found;
|
||||
pe
|
||||
}
|
||||
}
|
||||
|
||||
pub struct GpuState {
|
||||
pub context: Context,
|
||||
pub stream: Stream,
|
||||
profiler: Profiler,
|
||||
}
|
||||
|
||||
impl GpuState {
|
||||
fn init(device_index: u32) -> Result<Self, Box<dyn Error>> {
|
||||
match cust::init(CudaFlags::empty()) {
|
||||
Ok(_) => {}
|
||||
Err(cust::error::CudaError::SharedObjectInitFailed) => {}
|
||||
Err(e) => return Err(Box::new(e)),
|
||||
}
|
||||
|
||||
let device = Device::get_device(device_index)?;
|
||||
let name = device.name().unwrap_or_else(|_| "Unknown".into());
|
||||
|
||||
log::info!("Selected GPU: {}", name);
|
||||
|
||||
let has_unified = device
|
||||
.get_attribute(DeviceAttribute::UnifiedAddressing)
|
||||
.unwrap_or(0);
|
||||
if has_unified == 0 {
|
||||
panic!("Selected GPU does not support unified addressing.");
|
||||
}
|
||||
|
||||
let context = Context::new(device)?;
|
||||
|
||||
CurrentContext::set_resource_limit(ResourceLimit::StackSize, 8192)?;
|
||||
CurrentContext::set_resource_limit(ResourceLimit::PrintfFifoSize, 32 * 1024 * 1024)?;
|
||||
CurrentContext::set_cache_config(CacheConfig::PreferL1)?;
|
||||
|
||||
let stream = Stream::new(StreamFlags::DEFAULT, None)?;
|
||||
|
||||
Ok(Self {
|
||||
context,
|
||||
stream,
|
||||
profiler: Profiler::new(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
pub static ref GPU_STATE: Mutex<Option<GpuState>> = Mutex::new(None);
|
||||
}
|
||||
|
||||
pub fn gpu_init() {
|
||||
if !get_options().use_gpu {
|
||||
return;
|
||||
}
|
||||
|
||||
let device_id = get_options().gpu_device.unwrap_or(0);
|
||||
log::info!("Initializing GPU Device {}", device_id);
|
||||
|
||||
match GpuState::init(device_id) {
|
||||
Ok(state) => {
|
||||
#[cfg(feature = "use_nvtx")]
|
||||
nvtx::name_thread("MAIN_THREAD");
|
||||
*GPU_STATE.lock() = Some(state);
|
||||
}
|
||||
Err(e) => {
|
||||
panic!("Failed to initialize GPU: {:?}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn gpu_thread_init() {
|
||||
if let Some(state) = GPU_STATE.lock().as_ref() {
|
||||
if let Err(e) = CurrentContext::set_current(&state.context) {
|
||||
log::error!("Failed to set CUDA context for thread: {:?}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const SANITY_PTX: &str = r#"
|
||||
.version 6.5
|
||||
.target sm_30
|
||||
.address_size 64
|
||||
.visible .entry scale_array_kernel(.param .u64 ptr, .param .u32 n, .param .f32 scale) {
|
||||
.reg .u64 %ptr; .reg .u32 %n; .reg .f32 %scale;
|
||||
.reg .u32 %tid; .reg .u32 %ntid; .reg .u32 %ctid; .reg .u32 %idx;
|
||||
.reg .pred %p; .reg .u64 %addr; .reg .f32 %val;
|
||||
|
||||
ld.param.u64 %ptr, [ptr];
|
||||
ld.param.u32 %n, [n];
|
||||
ld.param.f32 %scale, [scale];
|
||||
|
||||
mov.u32 %tid, %tid.x;
|
||||
mov.u32 %ntid, %ntid.x;
|
||||
mov.u32 %ctid, %ctaid.x;
|
||||
mad.lo.s32 %idx, %ctid, %ntid, %tid;
|
||||
|
||||
setp.ge.u32 %p, %idx, %n;
|
||||
@%p ret;
|
||||
|
||||
mul.wide.u32 %addr, %idx, 4;
|
||||
add.u64 %addr, %ptr, %addr;
|
||||
ld.global.f32 %val, [%addr];
|
||||
mul.f32 %val, %val, %scale;
|
||||
st.global.f32 [%addr], %val;
|
||||
}
|
||||
"#;
|
||||
|
||||
pub fn launch_scale_kernel(ptr: *mut f32, len: usize, scale: f32) -> Result<(), Box<dyn Error>> {
|
||||
// Note: quick_init works best for isolated tests.
|
||||
// If the main engine runs, this might conflict, but for the sanity test it is correct.
|
||||
let _ctx = cust::quick_init()?;
|
||||
|
||||
let module = Module::from_str(SANITY_PTX)?;
|
||||
let stream = Stream::new(StreamFlags::NON_BLOCKING, None)?;
|
||||
let mut kernel = module.get_function("scale_array_kernel")?;
|
||||
|
||||
let block_size = 256;
|
||||
let grid_size = (len as u32 + block_size - 1) / block_size;
|
||||
|
||||
unsafe {
|
||||
launch!(
|
||||
kernel<<<grid_size, block_size, 0, stream>>>(
|
||||
ptr as u64,
|
||||
len as u32,
|
||||
scale
|
||||
)
|
||||
)?;
|
||||
}
|
||||
stream.synchronize()?;
|
||||
Ok(())
|
||||
}
|
||||
534
src/gpu/memory.rs
Normal file
534
src/gpu/memory.rs
Normal file
|
|
@ -0,0 +1,534 @@
|
|||
#![allow(clippy::too_many_arguments)]
|
||||
use super::Float4;
|
||||
use crate::Float;
|
||||
use crate::core::geometry::{Normal3f, Point2f, Point2i, Point3f, Point3fi, Ray, Vector3f};
|
||||
use crate::lights::LightSampleContext;
|
||||
use crate::spectra::{SampledSpectrum, SampledWavelengths};
|
||||
// use cust::memory::{CopyDestination, DeviceMemory};
|
||||
// use cust::prelude::*;
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! soa_struct {
|
||||
(
|
||||
$(#[$outer:meta])*
|
||||
pub struct $name:ident {
|
||||
$(
|
||||
pub $field:ident : $type:ty
|
||||
),* $(,)?
|
||||
}
|
||||
) => {
|
||||
#[cfg(feature = "use_gpu")]
|
||||
$(#[$outer])*
|
||||
pub struct $name {
|
||||
capacity: u32,
|
||||
pub count: cust::memory::DeviceBuffer<u32>,
|
||||
$(
|
||||
pub $field: cust::memory::DeviceBuffer<$type>,
|
||||
)*
|
||||
}
|
||||
|
||||
#[cfg(feature = "use_gpu")]
|
||||
impl $name {
|
||||
pub fn new(capacity: usize) -> cust::error::CudaResult<Self> {
|
||||
use cust::memory::DeviceBuffer;
|
||||
Ok(Self {
|
||||
capacity: capacity as u32,
|
||||
count: DeviceBuffer::zeroed(1)?,
|
||||
$(
|
||||
$field: DeviceBuffer::zeroed(capacity)?,
|
||||
)*
|
||||
})
|
||||
}
|
||||
|
||||
pub fn len(&self) -> cust::error::CudaResult<u32> {
|
||||
let mut host_count = [0u32; 1];
|
||||
self.count.copy_to(&mut host_count)?;
|
||||
Ok(host_count[0])
|
||||
}
|
||||
|
||||
pub fn reset(&mut self) -> cust::error::CudaResult<()> {
|
||||
self.count.copy_from(&[0])
|
||||
}
|
||||
|
||||
// Generate the View name
|
||||
pub fn as_view(&mut self) -> paste::paste! { [<$name View>] } {
|
||||
paste::paste! {
|
||||
[<$name View>] {
|
||||
capacity: self.capacity,
|
||||
count: self.count.as_device_ptr().as_mut_ptr(),
|
||||
$(
|
||||
$field: self.$field.as_device_ptr().as_raw() as *mut $type,
|
||||
)*
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
paste::paste! {
|
||||
#[repr(C)]
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct [<$name View>] {
|
||||
pub capacity: u32,
|
||||
pub count: *mut u32,
|
||||
$(
|
||||
pub $field: *mut $type,
|
||||
)*
|
||||
}
|
||||
|
||||
unsafe impl cust::memory::DeviceCopy for [<$name View>] {}
|
||||
|
||||
impl [<$name View>] {
|
||||
// The raw push that fills every field
|
||||
#[cfg(feature = "use_gpu")]
|
||||
pub unsafe fn push(&self, $( $field : $type ),* ) -> Option<u32> {
|
||||
use core::sync::atomic::{AtomicU32, Ordering};
|
||||
|
||||
let index = unsafe {
|
||||
let counter_ptr = self.count as *mut AtomicU32;
|
||||
(*counter_ptr).fetch_add(1, Ordering::Relaxed)
|
||||
};
|
||||
|
||||
if index >= self.capacity {
|
||||
return None;
|
||||
}
|
||||
|
||||
unsafe {
|
||||
$(
|
||||
*self.$field.add(index as usize) = $field;
|
||||
)*
|
||||
}
|
||||
|
||||
Some(index)
|
||||
}
|
||||
|
||||
#[cfg(feature = "use_gpu")]
|
||||
pub unsafe fn size(&self) -> u32 {
|
||||
use core::sync::atomic::{AtomicU32, Ordering};
|
||||
unsafe {
|
||||
(*(self.count as *const AtomicU32)).load(Ordering::Relaxed)
|
||||
}
|
||||
}
|
||||
|
||||
$(
|
||||
#[cfg(feature = "use_gpu")]
|
||||
pub fn [<$field _ptr>](&self) -> *mut $type {
|
||||
self.$field
|
||||
}
|
||||
)*
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Clone, Copy, Default)]
|
||||
pub struct RaySamplesDirect {
|
||||
pub u: Point2f,
|
||||
pub uc: Float,
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Clone, Copy, Default)]
|
||||
pub struct RaySamplesIndirect {
|
||||
pub uc: Float,
|
||||
pub rr: Float,
|
||||
pub u: Point2f,
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Clone, Copy, Default)]
|
||||
pub struct RaySamplesSubsurface {
|
||||
pub uc: Float,
|
||||
pub u: Point2f,
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Clone, Copy, Default)]
|
||||
pub struct RaySamples {
|
||||
pub direct: RaySamplesDirect,
|
||||
pub indirect: RaySamplesIndirect,
|
||||
pub have_subsurface: bool,
|
||||
pub subsurface: RaySamplesSubsurface,
|
||||
}
|
||||
|
||||
soa_struct! {
|
||||
pub struct RayQueue {
|
||||
pub ray_o: Point3f,
|
||||
pub ray_d: Vector3f,
|
||||
|
||||
pub depth: i32,
|
||||
pub lambda: SampledWavelengths,
|
||||
pub pixel_index: u32,
|
||||
|
||||
pub beta: SampledSpectrum,
|
||||
pub r_u: SampledSpectrum,
|
||||
pub r_l: SampledSpectrum,
|
||||
|
||||
pub ctx_pi: Point3f,
|
||||
pub ctx_n: Normal3f,
|
||||
pub ctx_ns: Normal3f,
|
||||
|
||||
pub eta_scale: Float,
|
||||
pub specular_bounce: u32,
|
||||
pub any_non_specular_bounces: u32,
|
||||
}
|
||||
}
|
||||
|
||||
soa_struct! {
|
||||
pub struct PixelSampleStateStorage {
|
||||
pub p_pixel: Point2i,
|
||||
pub l: SampledSpectrum,
|
||||
pub lambda: SampledWavelengths,
|
||||
pub filter_weight: Float,
|
||||
pub visible_surface: u32,
|
||||
pub camera_ray_weight: SampledSpectrum,
|
||||
|
||||
pub rs_direct_packed: Float4,
|
||||
pub rs_indirect_packed: Float4,
|
||||
pub rs_subsurface_packed: Float4,
|
||||
}
|
||||
}
|
||||
|
||||
soa_struct! {
|
||||
pub struct EscapedRayQueue {
|
||||
pub ray_o: Point3f,
|
||||
pub ray_d: Vector3f,
|
||||
pub depth: i32,
|
||||
pub lambda: SampledWavelengths,
|
||||
pub pixel_index: u32,
|
||||
pub beta: SampledSpectrum,
|
||||
pub specular_bounce: u32,
|
||||
pub r_u: SampledSpectrum,
|
||||
pub r_l: SampledSpectrum,
|
||||
pub ctx_pi: Point3f,
|
||||
pub ctx_n: Normal3f,
|
||||
pub ctx_ns: Normal3f,
|
||||
}
|
||||
}
|
||||
|
||||
soa_struct! {
|
||||
pub struct HitAreaLightQueue {
|
||||
pub area_light_id: u32, // Light ID
|
||||
pub p: Point3f,
|
||||
pub n: Normal3f,
|
||||
pub uv: Point2f,
|
||||
pub wo: Vector3f,
|
||||
pub lambda: SampledWavelengths,
|
||||
pub depth: i32,
|
||||
pub beta: SampledSpectrum,
|
||||
pub r_u: SampledSpectrum,
|
||||
pub r_l: SampledSpectrum,
|
||||
pub ctx_pi: Point3f,
|
||||
pub ctx_n: Normal3f,
|
||||
pub ctx_ns: Normal3f,
|
||||
pub specular_bounce: u32,
|
||||
pub pixel_index: u32,
|
||||
}
|
||||
}
|
||||
|
||||
soa_struct! {
|
||||
pub struct ShadowRayQueue {
|
||||
pub ray_o: Point3f,
|
||||
pub ray_d: Vector3f,
|
||||
pub t_max: Float,
|
||||
pub lambda: SampledWavelengths,
|
||||
pub ld: SampledSpectrum,
|
||||
pub r_u: SampledSpectrum,
|
||||
pub r_l: SampledSpectrum,
|
||||
pub pixel_index: u32,
|
||||
}
|
||||
}
|
||||
|
||||
soa_struct! {
|
||||
pub struct GetBSSRDFAndProbeRayQueue {
|
||||
pub material_id: u32,
|
||||
pub lambda: SampledWavelengths,
|
||||
pub beta: SampledSpectrum,
|
||||
pub r_u: SampledSpectrum,
|
||||
pub p: Point3f,
|
||||
pub wo: Vector3f,
|
||||
pub n: Normal3f,
|
||||
pub ns: Normal3f,
|
||||
pub dpdus: Vector3f,
|
||||
pub uv: Point2f,
|
||||
pub depth: i32,
|
||||
pub mi_inside: u32,
|
||||
pub mi_outside: u32,
|
||||
pub eta_scale: Float,
|
||||
pub pixel_index: u32,
|
||||
}
|
||||
}
|
||||
|
||||
soa_struct! {
|
||||
pub struct SubsurfaceScatterQueue {
|
||||
pub p0: Point3f,
|
||||
pub p1: Point3f,
|
||||
pub depth: i32,
|
||||
pub material_id: u32,
|
||||
pub lambda: SampledWavelengths,
|
||||
pub beta: SampledSpectrum,
|
||||
pub r_u: SampledSpectrum,
|
||||
pub mi_inside: u32,
|
||||
pub mi_outside: u32,
|
||||
pub eta_scale: Float,
|
||||
pub pixel_index: u32,
|
||||
}
|
||||
}
|
||||
|
||||
soa_struct! {
|
||||
pub struct MediumSampleQueue {
|
||||
pub ray_o: Point3f,
|
||||
pub ray_d: Vector3f,
|
||||
pub t_max: Float,
|
||||
pub lambda: SampledWavelengths,
|
||||
pub beta: SampledSpectrum,
|
||||
pub r_u: SampledSpectrum,
|
||||
pub r_l: SampledSpectrum,
|
||||
pub pixel_index: u32,
|
||||
|
||||
pub ctx_pi: Point3f,
|
||||
pub ctx_n: Normal3f,
|
||||
pub ctx_ns: Normal3f,
|
||||
|
||||
pub specular_bounce: u32,
|
||||
pub any_non_specular_bounces: u32,
|
||||
pub eta_scale: Float,
|
||||
|
||||
pub area_light_id: u32,
|
||||
pub pi: Point3fi,
|
||||
pub n: Normal3f,
|
||||
pub dpdu: Vector3f,
|
||||
pub dpdv: Vector3f,
|
||||
pub wo: Vector3f,
|
||||
pub uv: Point2f,
|
||||
pub material_id: u32,
|
||||
pub ns: Normal3f,
|
||||
pub dpdus: Vector3f,
|
||||
pub dpdvs: Vector3f,
|
||||
pub dndus: Normal3f,
|
||||
pub dndvs: Normal3f,
|
||||
pub face_index: i32,
|
||||
pub mi_inside: u32,
|
||||
pub mi_outside: u32,
|
||||
}
|
||||
}
|
||||
|
||||
soa_struct! {
|
||||
pub struct MaterialEvalQueue {
|
||||
pub material_id: u32,
|
||||
pub pi: Point3fi,
|
||||
pub n: Normal3f,
|
||||
pub dpdu: Vector3f,
|
||||
pub dpdv: Vector3f,
|
||||
pub time: Float,
|
||||
pub depth: i32,
|
||||
pub ns: Normal3f,
|
||||
pub dpdus: Vector3f,
|
||||
pub dpdvs: Vector3f,
|
||||
pub dndus: Normal3f,
|
||||
pub dndvs: Normal3f,
|
||||
pub uv: Point2f,
|
||||
pub face_index: i32,
|
||||
pub lambda: SampledWavelengths,
|
||||
pub pixel_index: u32,
|
||||
pub any_non_specular_bounces: u32,
|
||||
pub wo: Vector3f,
|
||||
pub beta: SampledSpectrum,
|
||||
pub r_u: SampledSpectrum,
|
||||
pub eta_scale: Float,
|
||||
pub mi_inside: u32,
|
||||
pub mi_outside: u32,
|
||||
}
|
||||
}
|
||||
|
||||
soa_struct! {
|
||||
pub struct MediumScatterQueue {
|
||||
pub p: Point3f,
|
||||
pub depth: usize,
|
||||
pub lambda: SampledWavelengths,
|
||||
pub beta: SampledSpectrum,
|
||||
pub r_u: SampledSpectrum,
|
||||
pub wo: Vector3f,
|
||||
pub time: Float,
|
||||
pub eta_scale: Float,
|
||||
pub pixel_index: usize,
|
||||
|
||||
// ID
|
||||
pub phase_function: u32,
|
||||
pub medium: u32,
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct RayWorkItem {
|
||||
pub ray: Ray,
|
||||
pub depth: i32,
|
||||
pub lambda: SampledWavelengths,
|
||||
pub pixel_index: u32,
|
||||
pub beta: SampledSpectrum,
|
||||
pub r_u: SampledSpectrum,
|
||||
pub r_l: SampledSpectrum,
|
||||
pub prev_intr_ctx: LightSampleContext,
|
||||
pub eta_scale: Float,
|
||||
pub specular_bounce: bool,
|
||||
pub any_non_specular_bounces: bool,
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct EscapedRayWorkItem {
|
||||
pub ray_o: Point3f,
|
||||
pub ray_d: Vector3f,
|
||||
pub depth: i32,
|
||||
pub lambda: SampledWavelengths,
|
||||
pub pixel_index: u32,
|
||||
pub beta: SampledSpectrum,
|
||||
pub specular_bounce: bool,
|
||||
pub r_u: SampledSpectrum,
|
||||
pub r_l: SampledSpectrum,
|
||||
pub prev_intr_ctx: LightSampleContext,
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct ShadowRayWorkItem {
|
||||
pub ray: Ray,
|
||||
pub t_max: Float,
|
||||
pub lambda: SampledWavelengths,
|
||||
pub ld: SampledSpectrum,
|
||||
pub r_u: SampledSpectrum,
|
||||
pub r_l: SampledSpectrum,
|
||||
pub pixel_index: u32,
|
||||
}
|
||||
|
||||
impl RayQueueView {
|
||||
#[cfg(feature = "use_gpu")]
|
||||
pub unsafe fn push_work_item(&self, item: RayWorkItem) -> Option<u32> {
|
||||
unsafe {
|
||||
self.push(
|
||||
item.ray.o,
|
||||
item.ray.d,
|
||||
item.depth,
|
||||
item.lambda,
|
||||
item.pixel_index,
|
||||
item.beta,
|
||||
item.r_u,
|
||||
item.r_l,
|
||||
item.prev_intr_ctx.pi.into(),
|
||||
item.prev_intr_ctx.n,
|
||||
item.prev_intr_ctx.ns,
|
||||
item.eta_scale,
|
||||
if item.specular_bounce { 1 } else { 0 },
|
||||
if item.any_non_specular_bounces { 1 } else { 0 },
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl EscapedRayQueueView {
|
||||
#[cfg(feature = "use_gpu")]
|
||||
pub unsafe fn push_work_item(&self, r: &RayWorkItem) -> Option<u32> {
|
||||
unsafe {
|
||||
self.push(
|
||||
r.ray.o,
|
||||
r.ray.d,
|
||||
r.depth,
|
||||
r.lambda,
|
||||
r.pixel_index,
|
||||
r.beta,
|
||||
if r.specular_bounce { 1 } else { 0 },
|
||||
r.r_u,
|
||||
r.r_l,
|
||||
r.prev_intr_ctx.pi.into(),
|
||||
r.prev_intr_ctx.n,
|
||||
r.prev_intr_ctx.ns,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PixelSampleStateStorageView {
|
||||
#[cfg(feature = "use_gpu")]
|
||||
pub unsafe fn get_samples(&self, index: u32) -> RaySamples {
|
||||
let i = index as usize;
|
||||
|
||||
let (dir, ind, ss) = unsafe {
|
||||
(
|
||||
*self.rs_direct_packed.add(i),
|
||||
*self.rs_indirect_packed.add(i),
|
||||
*self.rs_subsurface_packed.add(i),
|
||||
)
|
||||
};
|
||||
|
||||
let direct_u = Point2f::new(dir.v[0], dir.v[1]);
|
||||
let direct_uc = dir.v[2];
|
||||
let flags = dir.v[3] as i32;
|
||||
let have_subsurface = (flags & 1) != 0;
|
||||
|
||||
let indirect_uc = ind.v[0];
|
||||
let indirect_rr = ind.v[1];
|
||||
let indirect_u = Point2f::new(ind.v[2], ind.v[3]);
|
||||
|
||||
let subsurface_uc = ss.v[0];
|
||||
let subsurface_u = Point2f::new(ss.v[1], ss.v[2]);
|
||||
|
||||
RaySamples {
|
||||
direct: RaySamplesDirect {
|
||||
u: direct_u,
|
||||
uc: direct_uc,
|
||||
},
|
||||
indirect: RaySamplesIndirect {
|
||||
uc: indirect_uc,
|
||||
rr: indirect_rr,
|
||||
u: indirect_u,
|
||||
},
|
||||
have_subsurface,
|
||||
subsurface: RaySamplesSubsurface {
|
||||
uc: subsurface_uc,
|
||||
u: subsurface_u,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "use_gpu")]
|
||||
pub unsafe fn set_samples(&self, index: u32, rs: RaySamples) {
|
||||
if index >= self.capacity {
|
||||
return;
|
||||
}
|
||||
let i = index as usize;
|
||||
|
||||
let flags = if rs.have_subsurface { 1.0 } else { 0.0 };
|
||||
let dir = Float4 {
|
||||
v: [rs.direct.u.0[0], rs.direct.u.0[1], rs.direct.uc, flags],
|
||||
};
|
||||
|
||||
let ind = Float4 {
|
||||
v: [
|
||||
rs.indirect.uc,
|
||||
rs.indirect.rr,
|
||||
rs.indirect.u.0[0],
|
||||
rs.indirect.u.0[1],
|
||||
],
|
||||
};
|
||||
|
||||
unsafe {
|
||||
*self.rs_direct_packed.add(i) = dir;
|
||||
*self.rs_indirect_packed.add(i) = ind;
|
||||
}
|
||||
|
||||
if rs.have_subsurface {
|
||||
let ss = Float4 {
|
||||
v: [
|
||||
rs.subsurface.uc,
|
||||
rs.subsurface.u.0[0],
|
||||
rs.subsurface.u.0[1],
|
||||
0.0,
|
||||
],
|
||||
};
|
||||
unsafe {
|
||||
*self.rs_subsurface_packed.add(i) = ss;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
5
src/gpu/mod.rs
Normal file
5
src/gpu/mod.rs
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
pub mod driver;
|
||||
pub mod memory;
|
||||
pub mod wavefront;
|
||||
|
||||
pub use driver::launch_scale_kernel;
|
||||
45
src/gpu/wavefront/mod.rs
Normal file
45
src/gpu/wavefront/mod.rs
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
// use crate::core::scene::BasicScene;
|
||||
use crate::{
|
||||
EscapedRayQueue, GetBSSRDFAndProbeRayQueue, HitAreaLightQueue, MaterialEvalQueue,
|
||||
MediumSampleQueue, MediumScatterQueue, PixelSampleStateStorage, RayQueue, ShadowRayQueue,
|
||||
SubsurfaceScatterQueue,
|
||||
};
|
||||
use shared::core::camera::Camera;
|
||||
use shared::core::film::Film;
|
||||
use shared::core::filter::Filter;
|
||||
use shared::core::light::Light;
|
||||
use shared::core::sampler::Sampler;
|
||||
use shared::lights::LightSampler;
|
||||
use std::sync::Arc;
|
||||
|
||||
pub struct WavefrontPathIntegrator {
|
||||
pub film: Film,
|
||||
pub filter: Filter,
|
||||
pub sampler: Sampler,
|
||||
pub camera: Arc<Camera>,
|
||||
pub light_sampler: LightSampler,
|
||||
pub infinite_lights: Option<Vec<Arc<Light>>>,
|
||||
pub max_depth: i32,
|
||||
pub samples_per_pixel: i32,
|
||||
pub regularize: bool,
|
||||
pub scanlines_per_pixel: i32,
|
||||
pub max_queue_size: i32,
|
||||
pub pixel_sample_state: PixelSampleStateStorage,
|
||||
pub ray_queue: [RayQueue; 2],
|
||||
pub hit_area_light_queue: HitAreaLightQueue,
|
||||
pub shadow_ray_queue: ShadowRayQueue,
|
||||
pub escaped_ray_queue: Option<EscapedRayQueue>,
|
||||
pub basic_material_queue: Option<MaterialEvalQueue>,
|
||||
pub universal_material_queue: Option<MaterialEvalQueue>,
|
||||
pub medium_sample_queue: Option<MediumSampleQueue>,
|
||||
pub medium_scatter_queue: Option<MediumScatterQueue>,
|
||||
pub bssrf_queue: Option<GetBSSRDFAndProbeRayQueue>,
|
||||
pub subsurface_queue: Option<SubsurfaceScatterQueue>,
|
||||
}
|
||||
|
||||
#[cfg(feature = "use_gpu")]
|
||||
impl WavefrontPathIntegrator {
|
||||
pub fn new(scene: BasicScene) -> Self {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
Loading…
Reference in a new issue