diff --git a/shared/src/shapes/mesh.rs b/shared/src/shapes/mesh.rs new file mode 100644 index 0000000..cb8aae4 --- /dev/null +++ b/shared/src/shapes/mesh.rs @@ -0,0 +1,160 @@ +use crate::core::geometry::{Normal3f, Point2f, Point3f, Vector3f}; +use crate::utils::sampling::DevicePiecewiseConstant2D; +use crate::utils::Transform; +use crate::{Float, Gvec}; + +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct TriangleMesh { + pub p: GVec, + pub n: GVec, + pub s: GVec, + pub uv: GVec, + pub vertex_indices: GVec, + pub face_indices: GVec, + pub n_triangles: u32, + pub n_vertices: u32, + pub reverse_orientation: bool, + pub transform_swaps_handedness: bool, +} + +#[repr(C)] +#[derive(Debug, Clone, Copy)] +pub struct BilinearPatchMesh { + pub p: GVec, + pub n: GVec, + pub uv: GVec, + pub vertex_indices: GVec, + pub n_patches: u32, + pub n_vertices: u32, + pub reverse_orientation: bool, + pub transform_swaps_handedness: bool, + pub image_distribution: Ptr, +} + +unsafe impl Send for TriangleMesh {} +unsafe impl Sync for TriangleMesh {} +unsafe impl Send for BilinearPatchMesh {} +unsafe impl Sync for BilinearPatchMesh {} + +impl TriangleMesh { + pub fn new( + render_from_object: &Transform, + reverse_orientation: bool, + vertex_indices: &[i32], + p: &[Point3f], + n: &[Normal3f], + s: &[Vector3f], + uv: &[Point2f], + face_indices: &[i32], + ) -> Self { + let n_triangles = (vertex_indices.len() / 3) as u32; + let n_vertices = p.len() as u32; + + let mut p_gvec = gvec_with_capacity(p.len()); + for pt in p { + p_gvec.push(render_from_object.apply_to_point(*pt)); + } + + let mut n_gvec = gvec_with_capacity(n.len()); + if !n.is_empty() { + assert_eq!(n_vertices as usize, n.len(), "Normal count mismatch"); + for nn in n { + let mut transformed = render_from_object.apply_to_normal(*nn); + if reverse_orientation { + transformed = -transformed; + } + n_gvec.push(transformed); + } + } + + let mut s_gvec = gvec_with_capacity(s.len()); + if !s.is_empty() { + assert_eq!(n_vertices as usize, s.len(), "Tangent count mismatch"); + for ss in s { + s_gvec.push(render_from_object.apply_to_vector(*ss)); + } + } + + assert!( + uv.is_empty() || uv.len() == n_vertices as usize, + "UV count mismatch" + ); + assert!( + face_indices.is_empty() || face_indices.len() == n_triangles as usize, + "Face index count mismatch" + ); + + Self { + vertex_indices: gvec_from_slice(vertex_indices), + p: p_gvec, + n: n_gvec, + s: s_gvec, + uv: gvec_from_slice(uv), + face_indices: gvec_from_slice(face_indices), + n_triangles, + n_vertices, + reverse_orientation, + transform_swaps_handedness: render_from_object.swaps_handedness(), + } + } + + pub fn positions(&self) -> &[Point3f] { + &self.p + } + pub fn indices(&self) -> &[i32] { + &self.vertex_indices + } + pub fn normals(&self) -> &[Normal3f] { + &self.n + } + pub fn uvs(&self) -> &[Point2f] { + &self.uv + } +} + +impl BilinearPatchMesh { + pub fn new( + render_from_object: &Transform, + reverse_orientation: bool, + vertex_indices: &[i32], + p: &[Point3f], + n: &[Normal3f], + uv: &[Point2f], + image_distribution: Option, + ) -> Self { + let n_patches = (vertex_indices.len() / 4) as u32; + let n_vertices = p.len() as u32; + + let mut p_gvec = gvec_with_capacity(p.len()); + for pt in p { + p_gvec.push(render_from_object.apply_to_point(*pt)); + } + + let mut n_gvec = gvec_with_capacity(n.len()); + if !n.is_empty() { + assert_eq!(n_vertices as usize, n.len()); + for nn in n { + let mut transformed = render_from_object.apply_to_normal(*nn); + if reverse_orientation { + transformed = -transformed; + } + n_gvec.push(transformed); + } + } + + assert!(uv.is_empty() || uv.len() == n_vertices as usize); + + Self { + vertex_indices: gvec_from_slice(vertex_indices), + p: p_gvec, + n: n_gvec, + uv: gvec_from_slice(uv), + image_distribution: Ptr::from(image_distribution), + n_patches, + n_vertices, + reverse_orientation, + transform_swaps_handedness: render_from_object.swaps_handedness(), + } + } +} diff --git a/shared/src/utils/alloc.rs b/shared/src/utils/alloc.rs new file mode 100644 index 0000000..c6db9a1 --- /dev/null +++ b/shared/src/utils/alloc.rs @@ -0,0 +1,238 @@ +#![feature(allocator_api)] +extern crate alloc; + +use alloc::alloc::Global; +use core::alloc::{AllocError, Allocator, Layout}; +use core::ptr::NonNull; + +// CPU fallback, delegates to Global +#[derive(Debug, Clone, Copy, Default)] +pub struct SystemAlloc; + +unsafe impl Allocator for SystemAlloc { + fn allocate(&self, layout: Layout) -> Result, AllocError> { + Global.allocate(layout) + } + + unsafe fn deallocate(&self, ptr: NonNull, layout: Layout) { + Global.deallocate(ptr, layout) + } + + unsafe fn grow( + &self, + ptr: NonNull, + old_layout: Layout, + new_layout: Layout, + ) -> Result, AllocError> { + Global.grow(ptr, old_layout, new_layout) + } + + unsafe fn shrink( + &self, + ptr: NonNull, + old_layout: Layout, + new_layout: Layout, + ) -> Result, AllocError> { + Global.shrink(ptr, old_layout, new_layout) + } +} + +// Unified memory via cudaMallocManaged + +#[cfg(feature = "cuda")] +pub mod cuda { + use super::*; + use cust::memory::{cuda_free_unified, cuda_malloc_unified, UnifiedPointer}; + + #[derive(Debug, Clone, Copy, Default)] + pub struct CudaAlloc; + + unsafe impl Allocator for CudaAlloc { + fn allocate(&self, layout: Layout) -> Result, AllocError> { + if layout.size() == 0 { + // Zero-sized allocations: return a dangling aligned pointer + // with zero length, which is valid for ZSTs. + let ptr = NonNull::new(layout.align() as *mut u8).ok_or(AllocError)?; + return Ok(NonNull::slice_from_raw_parts(ptr, 0)); + } + + let ptr = cuda_malloc_unified::(layout.size()).map_err(|_| AllocError)?; + + let raw = ptr.as_raw_mut(); + core::mem::forget(ptr); // Arena owns the raw pointer, not the RAII wrapper + + let nn = NonNull::new(raw).ok_or(AllocError)?; + Ok(NonNull::slice_from_raw_parts(nn, layout.size())) + } + + unsafe fn deallocate(&self, ptr: NonNull, layout: Layout) { + if layout.size() == 0 { + return; + } + let _ = cuda_free_unified(UnifiedPointer::wrap(ptr.as_ptr())); + } + } +} + +// Host-visible GPU memory via gpu-allocator +#[cfg(feature = "cuda")] +pub mod cuda { + use super::*; + use cust::memory::{cuda_free_unified, cuda_malloc_unified, UnifiedPointer}; + + #[derive(Debug, Clone, Copy, Default)] + pub struct CudaAlloc; + + unsafe impl Allocator for CudaAlloc { + fn allocate(&self, layout: Layout) -> Result, AllocError> { + if layout.size() == 0 { + // Zero-sized allocations: return a dangling aligned pointer + // with zero length, which is valid for ZSTs. + let ptr = NonNull::new(layout.align() as *mut u8).ok_or(AllocError)?; + return Ok(NonNull::slice_from_raw_parts(ptr, 0)); + } + + let ptr = cuda_malloc_unified::(layout.size()).map_err(|_| AllocError)?; + + let raw = ptr.as_raw_mut(); + core::mem::forget(ptr); // Arena owns the raw pointer, not the RAII wrapper + + let nn = NonNull::new(raw).ok_or(AllocError)?; + Ok(NonNull::slice_from_raw_parts(nn, layout.size())) + } + + unsafe fn deallocate(&self, ptr: NonNull, layout: Layout) { + if layout.size() == 0 { + return; + } + let _ = cuda_free_unified(UnifiedPointer::wrap(ptr.as_ptr())); + } + } +} + +// Host-visible GPU memory via gpu-allocator + +#[cfg(feature = "vulkan")] +pub mod vulkan { + use super::*; + use ash::vk; + use gpu_allocator::vulkan::{ + Allocation, AllocationCreateDesc, AllocationScheme, Allocator as VkAllocator, + AllocatorCreateDesc, + }; + use gpu_allocator::MemoryLocation; + use parking_lot::Mutex; + use std::collections::HashMap; + use std::sync::Arc; + + /// Wraps a gpu-allocator instance. Clone is cheap + #[derive(Clone)] + pub struct VulkanAlloc { + inner: Arc>, + } + + struct VulkanInner { + allocator: VkAllocator, + allocations: HashMap, + } + + impl VulkanAlloc { + pub fn new( + instance: &ash::Instance, + device: ash::Device, + physical_device: vk::PhysicalDevice, + ) -> Self { + let allocator = VkAllocator::new(&AllocatorCreateDesc { + instance: instance.clone(), + device: device.clone(), + physical_device, + debug_settings: Default::default(), + buffer_device_address: false, + allocation_sizes: Default::default(), + }) + .expect("Failed to create Vulkan allocator"); + + Self { + inner: Arc::new(Mutex::new(VulkanInner { + allocator, + allocations: HashMap::new(), + })), + } + } + } + + unsafe impl Allocator for VulkanAlloc { + fn allocate(&self, layout: Layout) -> Result, AllocError> { + if layout.size() == 0 { + let ptr = NonNull::new(layout.align() as *mut u8).ok_or(AllocError)?; + return Ok(NonNull::slice_from_raw_parts(ptr, 0)); + } + + let mut inner = self.inner.lock(); + + let allocation = inner + .allocator + .allocate(&AllocationCreateDesc { + name: "gvec_alloc", + requirements: vk::MemoryRequirements { + size: layout.size() as u64, + alignment: layout.align() as u64, + memory_type_bits: u32::MAX, + }, + location: MemoryLocation::CpuToGpu, + linear: true, + allocation_scheme: AllocationScheme::GpuAllocatorManaged, + }) + .map_err(|_| AllocError)?; + + let ptr = allocation.mapped_ptr().ok_or(AllocError)?.as_ptr() as *mut u8; + + let nn = NonNull::new(ptr).ok_or(AllocError)?; + inner.allocations.insert(ptr as usize, allocation); + Ok(NonNull::slice_from_raw_parts(nn, layout.size())) + } + + unsafe fn deallocate(&self, ptr: NonNull, layout: Layout) { + if layout.size() == 0 { + return; + } + let mut inner = self.inner.lock(); + if let Some(allocation) = inner.allocations.remove(&(ptr.as_ptr() as usize)) { + inner + .allocator + .free(allocation) + .expect("Vulkan free failed"); + } + } + } +} + +#[cfg(feature = "cuda")] +pub type GpuAlloc = cuda::CudaAlloc; + +#[cfg(all(feature = "vulkan", not(feature = "cuda")))] +pub type GpuAlloc = vulkan::VulkanAlloc; + +#[cfg(not(any(feature = "cuda", feature = "vulkan")))] +pub type GpuAlloc = SystemAlloc; + +pub type GVec = alloc::vec::Vec; +pub type GBox = alloc::boxed::Box; + +pub fn gvec() -> GVec { + Vec::new_in(GpuAlloc::default()) +} + +pub fn gvec_with_capacity(cap: usize) -> GVec { + Vec::with_capacity_in(cap, GpuAlloc::default()) +} + +pub fn gvec_from_slice(slice: &[T]) -> GVec { + let mut v = gvec_with_capacity(slice.len()); + v.extend_from_slice(slice); + v +} + +pub fn gbox(value: T) -> GBox { + Box::new_in(value, GpuAlloc::default()) +} diff --git a/src/utils/upload.rs b/src/utils/upload.rs new file mode 100644 index 0000000..40b3555 --- /dev/null +++ b/src/utils/upload.rs @@ -0,0 +1,167 @@ +use crate::core::image::Image; +use crate::spectra::DenselySampledSpectrumBuffer; +use crate::Arena; +use shared::core::color::RGBToSpectrumTable; +use shared::core::image::DeviceImage; +use shared::core::light::Light; +use shared::core::material::Material; +use shared::core::shape::Shape; +use shared::core::spectrum::Spectrum; +use shared::spectra::{DenselySampledSpectrum, DeviceStandardColorSpaces, RGBColorSpace}; +use shared::Ptr; +use std::slice::from_raw_parts; + +pub trait DeviceRepr { + /// The `#[repr(C)] Copy` device-side struct. + type Target: Copy; + + /// Upload into the arena and return the device struct by value. + /// Use this when embedding the result inline in another device struct. + fn upload_value(&self, arena: &Arena) -> Self::Target; + + /// Upload into the arena and return a Ptr to the device struct. + /// This is the common entry point — allocates the Target in the arena. + fn upload(&self, arena: &Arena) -> Ptr { + let value = self.upload_value(arena); + arena.alloc(value) + } +} + +impl DeviceRepr for Option { + type Target = T::Target; + + fn upload_value(&self, arena: &Arena) -> Self::Target { + match self { + Some(val) => val.upload_value(arena), + None => panic!("Cannot upload_value on None — use upload() which returns Ptr::null()"), + } + } + + fn upload(&self, arena: &Arena) -> Ptr { + match self { + Some(val) => val.upload(arena), + None => Ptr::null(), + } + } +} + +impl DeviceRepr for std::sync::Arc { + type Target = T::Target; + + fn upload_value(&self, arena: &Arena) -> Self::Target { + (**self).upload_value(arena) + } + + fn upload(&self, arena: &Arena) -> Ptr { + (**self).upload(arena) + } +} + +impl DeviceRepr for Box { + type Target = T::Target; + + fn upload_value(&self, arena: &Arena) -> Self::Target { + (**self).upload_value(arena) + } + + fn upload(&self, arena: &Arena) -> Ptr { + (**self).upload(arena) + } +} + +impl DeviceRepr for Shape { + type Target = Shape; + fn upload_value(&self, _arena: &Arena) -> Shape { + self.clone() + } +} + +impl DeviceRepr for Light { + type Target = Light; + fn upload_value(&self, _arena: &Arena) -> Light { + self.clone() + } +} + +impl DeviceRepr for Spectrum { + type Target = Spectrum; + fn upload_value(&self, _arena: &Arena) -> Spectrum { + self.clone() + } +} + +impl DeviceRepr for Material { + type Target = Material; + fn upload_value(&self, _arena: &Arena) -> Material { + self.clone() + } +} + +impl DeviceRepr for Image { + type Target = DeviceImage; + fn upload_value(&self, _arena: &Arena) -> DeviceImage { + *self.device() + } +} + +impl DeviceRepr for DenselySampledSpectrumBuffer { + type Target = DenselySampledSpectrum; + fn upload_value(&self, _arena: &Arena) -> DenselySampledSpectrum { + self.device() + } +} + +impl DeviceRepr for RGBToSpectrumTable { + type Target = RGBToSpectrumTable; + + fn upload_value(&self, arena: &Arena) -> RGBToSpectrumTable { + let n_nodes = self.n_nodes as usize; + + // Safety: these Ptrs point into static or previously-uploaded data; + // we're copying the contents into the arena for a new lifetime. + let z_slice = unsafe { from_raw_parts(self.z_nodes.as_raw(), n_nodes) }; + let (z_ptr, _) = arena.alloc_slice(z_slice); + + let n_coeffs = 3 * n_nodes.pow(3); + let coeffs_slice = unsafe { from_raw_parts(self.coeffs.as_raw(), n_coeffs) }; + let (c_ptr, _) = arena.alloc_slice(coeffs_slice); + + RGBToSpectrumTable { + z_nodes: z_ptr, + coeffs: c_ptr, + n_nodes: self.n_nodes, + } + } +} + +impl DeviceRepr for RGBColorSpace { + type Target = RGBColorSpace; + + fn upload_value(&self, arena: &Arena) -> RGBColorSpace { + let table_ptr = self.rgb_to_spectrum_table.upload(arena); + + RGBColorSpace { + r: self.r, + g: self.g, + b: self.b, + w: self.w, + illuminant: self.illuminant.clone(), + rgb_to_spectrum_table: table_ptr, + xyz_from_rgb: self.xyz_from_rgb, + rgb_from_xyz: self.rgb_from_xyz, + } + } +} + +impl DeviceRepr for DeviceStandardColorSpaces { + type Target = DeviceStandardColorSpaces; + + fn upload_value(&self, arena: &Arena) -> DeviceStandardColorSpaces { + DeviceStandardColorSpaces { + srgb: self.srgb.upload(arena), + dci_p3: self.dci_p3.upload(arena), + rec2020: self.rec2020.upload(arena), + aces2065_1: self.aces2065_1.upload(arena), + } + } +}