Trying out allocate API, and just have a unified Vec and Ptr representation instead of duplicating everything and having DeviceRepr

This commit is contained in:
Wito Wiala 2026-05-18 22:44:22 +01:00
parent dad7300a14
commit fa4692bfe6
3 changed files with 565 additions and 0 deletions

160
shared/src/shapes/mesh.rs Normal file
View file

@ -0,0 +1,160 @@
use crate::core::geometry::{Normal3f, Point2f, Point3f, Vector3f};
use crate::utils::sampling::DevicePiecewiseConstant2D;
use crate::utils::Transform;
use crate::{Float, Gvec};
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct TriangleMesh {
pub p: GVec<Point3f>,
pub n: GVec<Normal3f>,
pub s: GVec<Vector3f>,
pub uv: GVec<Point2f>,
pub vertex_indices: GVec<i32>,
pub face_indices: GVec<i32>,
pub n_triangles: u32,
pub n_vertices: u32,
pub reverse_orientation: bool,
pub transform_swaps_handedness: bool,
}
#[repr(C)]
#[derive(Debug, Clone, Copy)]
pub struct BilinearPatchMesh {
pub p: GVec<Point3f>,
pub n: GVec<Normal3f>,
pub uv: GVec<Point2f>,
pub vertex_indices: GVec<i32>,
pub n_patches: u32,
pub n_vertices: u32,
pub reverse_orientation: bool,
pub transform_swaps_handedness: bool,
pub image_distribution: Ptr<PiecewiseConstant2D>,
}
unsafe impl Send for TriangleMesh {}
unsafe impl Sync for TriangleMesh {}
unsafe impl Send for BilinearPatchMesh {}
unsafe impl Sync for BilinearPatchMesh {}
impl TriangleMesh {
pub fn new(
render_from_object: &Transform,
reverse_orientation: bool,
vertex_indices: &[i32],
p: &[Point3f],
n: &[Normal3f],
s: &[Vector3f],
uv: &[Point2f],
face_indices: &[i32],
) -> Self {
let n_triangles = (vertex_indices.len() / 3) as u32;
let n_vertices = p.len() as u32;
let mut p_gvec = gvec_with_capacity(p.len());
for pt in p {
p_gvec.push(render_from_object.apply_to_point(*pt));
}
let mut n_gvec = gvec_with_capacity(n.len());
if !n.is_empty() {
assert_eq!(n_vertices as usize, n.len(), "Normal count mismatch");
for nn in n {
let mut transformed = render_from_object.apply_to_normal(*nn);
if reverse_orientation {
transformed = -transformed;
}
n_gvec.push(transformed);
}
}
let mut s_gvec = gvec_with_capacity(s.len());
if !s.is_empty() {
assert_eq!(n_vertices as usize, s.len(), "Tangent count mismatch");
for ss in s {
s_gvec.push(render_from_object.apply_to_vector(*ss));
}
}
assert!(
uv.is_empty() || uv.len() == n_vertices as usize,
"UV count mismatch"
);
assert!(
face_indices.is_empty() || face_indices.len() == n_triangles as usize,
"Face index count mismatch"
);
Self {
vertex_indices: gvec_from_slice(vertex_indices),
p: p_gvec,
n: n_gvec,
s: s_gvec,
uv: gvec_from_slice(uv),
face_indices: gvec_from_slice(face_indices),
n_triangles,
n_vertices,
reverse_orientation,
transform_swaps_handedness: render_from_object.swaps_handedness(),
}
}
pub fn positions(&self) -> &[Point3f] {
&self.p
}
pub fn indices(&self) -> &[i32] {
&self.vertex_indices
}
pub fn normals(&self) -> &[Normal3f] {
&self.n
}
pub fn uvs(&self) -> &[Point2f] {
&self.uv
}
}
impl BilinearPatchMesh {
pub fn new(
render_from_object: &Transform,
reverse_orientation: bool,
vertex_indices: &[i32],
p: &[Point3f],
n: &[Normal3f],
uv: &[Point2f],
image_distribution: Option<PiecewiseConstant2D>,
) -> Self {
let n_patches = (vertex_indices.len() / 4) as u32;
let n_vertices = p.len() as u32;
let mut p_gvec = gvec_with_capacity(p.len());
for pt in p {
p_gvec.push(render_from_object.apply_to_point(*pt));
}
let mut n_gvec = gvec_with_capacity(n.len());
if !n.is_empty() {
assert_eq!(n_vertices as usize, n.len());
for nn in n {
let mut transformed = render_from_object.apply_to_normal(*nn);
if reverse_orientation {
transformed = -transformed;
}
n_gvec.push(transformed);
}
}
assert!(uv.is_empty() || uv.len() == n_vertices as usize);
Self {
vertex_indices: gvec_from_slice(vertex_indices),
p: p_gvec,
n: n_gvec,
uv: gvec_from_slice(uv),
image_distribution: Ptr::from(image_distribution),
n_patches,
n_vertices,
reverse_orientation,
transform_swaps_handedness: render_from_object.swaps_handedness(),
}
}
}

238
shared/src/utils/alloc.rs Normal file
View file

@ -0,0 +1,238 @@
#![feature(allocator_api)]
extern crate alloc;
use alloc::alloc::Global;
use core::alloc::{AllocError, Allocator, Layout};
use core::ptr::NonNull;
// CPU fallback, delegates to Global
#[derive(Debug, Clone, Copy, Default)]
pub struct SystemAlloc;
unsafe impl Allocator for SystemAlloc {
fn allocate(&self, layout: Layout) -> Result<NonNull<[u8]>, AllocError> {
Global.allocate(layout)
}
unsafe fn deallocate(&self, ptr: NonNull<u8>, layout: Layout) {
Global.deallocate(ptr, layout)
}
unsafe fn grow(
&self,
ptr: NonNull<u8>,
old_layout: Layout,
new_layout: Layout,
) -> Result<NonNull<[u8]>, AllocError> {
Global.grow(ptr, old_layout, new_layout)
}
unsafe fn shrink(
&self,
ptr: NonNull<u8>,
old_layout: Layout,
new_layout: Layout,
) -> Result<NonNull<[u8]>, AllocError> {
Global.shrink(ptr, old_layout, new_layout)
}
}
// Unified memory via cudaMallocManaged
#[cfg(feature = "cuda")]
pub mod cuda {
use super::*;
use cust::memory::{cuda_free_unified, cuda_malloc_unified, UnifiedPointer};
#[derive(Debug, Clone, Copy, Default)]
pub struct CudaAlloc;
unsafe impl Allocator for CudaAlloc {
fn allocate(&self, layout: Layout) -> Result<NonNull<[u8]>, AllocError> {
if layout.size() == 0 {
// Zero-sized allocations: return a dangling aligned pointer
// with zero length, which is valid for ZSTs.
let ptr = NonNull::new(layout.align() as *mut u8).ok_or(AllocError)?;
return Ok(NonNull::slice_from_raw_parts(ptr, 0));
}
let ptr = cuda_malloc_unified::<u8>(layout.size()).map_err(|_| AllocError)?;
let raw = ptr.as_raw_mut();
core::mem::forget(ptr); // Arena owns the raw pointer, not the RAII wrapper
let nn = NonNull::new(raw).ok_or(AllocError)?;
Ok(NonNull::slice_from_raw_parts(nn, layout.size()))
}
unsafe fn deallocate(&self, ptr: NonNull<u8>, layout: Layout) {
if layout.size() == 0 {
return;
}
let _ = cuda_free_unified(UnifiedPointer::wrap(ptr.as_ptr()));
}
}
}
// Host-visible GPU memory via gpu-allocator
#[cfg(feature = "cuda")]
pub mod cuda {
use super::*;
use cust::memory::{cuda_free_unified, cuda_malloc_unified, UnifiedPointer};
#[derive(Debug, Clone, Copy, Default)]
pub struct CudaAlloc;
unsafe impl Allocator for CudaAlloc {
fn allocate(&self, layout: Layout) -> Result<NonNull<[u8]>, AllocError> {
if layout.size() == 0 {
// Zero-sized allocations: return a dangling aligned pointer
// with zero length, which is valid for ZSTs.
let ptr = NonNull::new(layout.align() as *mut u8).ok_or(AllocError)?;
return Ok(NonNull::slice_from_raw_parts(ptr, 0));
}
let ptr = cuda_malloc_unified::<u8>(layout.size()).map_err(|_| AllocError)?;
let raw = ptr.as_raw_mut();
core::mem::forget(ptr); // Arena owns the raw pointer, not the RAII wrapper
let nn = NonNull::new(raw).ok_or(AllocError)?;
Ok(NonNull::slice_from_raw_parts(nn, layout.size()))
}
unsafe fn deallocate(&self, ptr: NonNull<u8>, layout: Layout) {
if layout.size() == 0 {
return;
}
let _ = cuda_free_unified(UnifiedPointer::wrap(ptr.as_ptr()));
}
}
}
// Host-visible GPU memory via gpu-allocator
#[cfg(feature = "vulkan")]
pub mod vulkan {
use super::*;
use ash::vk;
use gpu_allocator::vulkan::{
Allocation, AllocationCreateDesc, AllocationScheme, Allocator as VkAllocator,
AllocatorCreateDesc,
};
use gpu_allocator::MemoryLocation;
use parking_lot::Mutex;
use std::collections::HashMap;
use std::sync::Arc;
/// Wraps a gpu-allocator instance. Clone is cheap
#[derive(Clone)]
pub struct VulkanAlloc {
inner: Arc<Mutex<VulkanInner>>,
}
struct VulkanInner {
allocator: VkAllocator,
allocations: HashMap<usize, Allocation>,
}
impl VulkanAlloc {
pub fn new(
instance: &ash::Instance,
device: ash::Device,
physical_device: vk::PhysicalDevice,
) -> Self {
let allocator = VkAllocator::new(&AllocatorCreateDesc {
instance: instance.clone(),
device: device.clone(),
physical_device,
debug_settings: Default::default(),
buffer_device_address: false,
allocation_sizes: Default::default(),
})
.expect("Failed to create Vulkan allocator");
Self {
inner: Arc::new(Mutex::new(VulkanInner {
allocator,
allocations: HashMap::new(),
})),
}
}
}
unsafe impl Allocator for VulkanAlloc {
fn allocate(&self, layout: Layout) -> Result<NonNull<[u8]>, AllocError> {
if layout.size() == 0 {
let ptr = NonNull::new(layout.align() as *mut u8).ok_or(AllocError)?;
return Ok(NonNull::slice_from_raw_parts(ptr, 0));
}
let mut inner = self.inner.lock();
let allocation = inner
.allocator
.allocate(&AllocationCreateDesc {
name: "gvec_alloc",
requirements: vk::MemoryRequirements {
size: layout.size() as u64,
alignment: layout.align() as u64,
memory_type_bits: u32::MAX,
},
location: MemoryLocation::CpuToGpu,
linear: true,
allocation_scheme: AllocationScheme::GpuAllocatorManaged,
})
.map_err(|_| AllocError)?;
let ptr = allocation.mapped_ptr().ok_or(AllocError)?.as_ptr() as *mut u8;
let nn = NonNull::new(ptr).ok_or(AllocError)?;
inner.allocations.insert(ptr as usize, allocation);
Ok(NonNull::slice_from_raw_parts(nn, layout.size()))
}
unsafe fn deallocate(&self, ptr: NonNull<u8>, layout: Layout) {
if layout.size() == 0 {
return;
}
let mut inner = self.inner.lock();
if let Some(allocation) = inner.allocations.remove(&(ptr.as_ptr() as usize)) {
inner
.allocator
.free(allocation)
.expect("Vulkan free failed");
}
}
}
}
#[cfg(feature = "cuda")]
pub type GpuAlloc = cuda::CudaAlloc;
#[cfg(all(feature = "vulkan", not(feature = "cuda")))]
pub type GpuAlloc = vulkan::VulkanAlloc;
#[cfg(not(any(feature = "cuda", feature = "vulkan")))]
pub type GpuAlloc = SystemAlloc;
pub type GVec<T> = alloc::vec::Vec<T, GpuAlloc>;
pub type GBox<T> = alloc::boxed::Box<T, GpuAlloc>;
pub fn gvec<T>() -> GVec<T> {
Vec::new_in(GpuAlloc::default())
}
pub fn gvec_with_capacity<T>(cap: usize) -> GVec<T> {
Vec::with_capacity_in(cap, GpuAlloc::default())
}
pub fn gvec_from_slice<T: Clone>(slice: &[T]) -> GVec<T> {
let mut v = gvec_with_capacity(slice.len());
v.extend_from_slice(slice);
v
}
pub fn gbox<T>(value: T) -> GBox<T> {
Box::new_in(value, GpuAlloc::default())
}

167
src/utils/upload.rs Normal file
View file

@ -0,0 +1,167 @@
use crate::core::image::Image;
use crate::spectra::DenselySampledSpectrumBuffer;
use crate::Arena;
use shared::core::color::RGBToSpectrumTable;
use shared::core::image::DeviceImage;
use shared::core::light::Light;
use shared::core::material::Material;
use shared::core::shape::Shape;
use shared::core::spectrum::Spectrum;
use shared::spectra::{DenselySampledSpectrum, DeviceStandardColorSpaces, RGBColorSpace};
use shared::Ptr;
use std::slice::from_raw_parts;
pub trait DeviceRepr {
/// The `#[repr(C)] Copy` device-side struct.
type Target: Copy;
/// Upload into the arena and return the device struct by value.
/// Use this when embedding the result inline in another device struct.
fn upload_value(&self, arena: &Arena) -> Self::Target;
/// Upload into the arena and return a Ptr to the device struct.
/// This is the common entry point — allocates the Target in the arena.
fn upload(&self, arena: &Arena) -> Ptr<Self::Target> {
let value = self.upload_value(arena);
arena.alloc(value)
}
}
impl<T: DeviceRepr> DeviceRepr for Option<T> {
type Target = T::Target;
fn upload_value(&self, arena: &Arena) -> Self::Target {
match self {
Some(val) => val.upload_value(arena),
None => panic!("Cannot upload_value on None — use upload() which returns Ptr::null()"),
}
}
fn upload(&self, arena: &Arena) -> Ptr<Self::Target> {
match self {
Some(val) => val.upload(arena),
None => Ptr::null(),
}
}
}
impl<T: DeviceRepr> DeviceRepr for std::sync::Arc<T> {
type Target = T::Target;
fn upload_value(&self, arena: &Arena) -> Self::Target {
(**self).upload_value(arena)
}
fn upload(&self, arena: &Arena) -> Ptr<Self::Target> {
(**self).upload(arena)
}
}
impl<T: DeviceRepr> DeviceRepr for Box<T> {
type Target = T::Target;
fn upload_value(&self, arena: &Arena) -> Self::Target {
(**self).upload_value(arena)
}
fn upload(&self, arena: &Arena) -> Ptr<Self::Target> {
(**self).upload(arena)
}
}
impl DeviceRepr for Shape {
type Target = Shape;
fn upload_value(&self, _arena: &Arena) -> Shape {
self.clone()
}
}
impl DeviceRepr for Light {
type Target = Light;
fn upload_value(&self, _arena: &Arena) -> Light {
self.clone()
}
}
impl DeviceRepr for Spectrum {
type Target = Spectrum;
fn upload_value(&self, _arena: &Arena) -> Spectrum {
self.clone()
}
}
impl DeviceRepr for Material {
type Target = Material;
fn upload_value(&self, _arena: &Arena) -> Material {
self.clone()
}
}
impl DeviceRepr for Image {
type Target = DeviceImage;
fn upload_value(&self, _arena: &Arena) -> DeviceImage {
*self.device()
}
}
impl DeviceRepr for DenselySampledSpectrumBuffer {
type Target = DenselySampledSpectrum;
fn upload_value(&self, _arena: &Arena) -> DenselySampledSpectrum {
self.device()
}
}
impl DeviceRepr for RGBToSpectrumTable {
type Target = RGBToSpectrumTable;
fn upload_value(&self, arena: &Arena) -> RGBToSpectrumTable {
let n_nodes = self.n_nodes as usize;
// Safety: these Ptrs point into static or previously-uploaded data;
// we're copying the contents into the arena for a new lifetime.
let z_slice = unsafe { from_raw_parts(self.z_nodes.as_raw(), n_nodes) };
let (z_ptr, _) = arena.alloc_slice(z_slice);
let n_coeffs = 3 * n_nodes.pow(3);
let coeffs_slice = unsafe { from_raw_parts(self.coeffs.as_raw(), n_coeffs) };
let (c_ptr, _) = arena.alloc_slice(coeffs_slice);
RGBToSpectrumTable {
z_nodes: z_ptr,
coeffs: c_ptr,
n_nodes: self.n_nodes,
}
}
}
impl DeviceRepr for RGBColorSpace {
type Target = RGBColorSpace;
fn upload_value(&self, arena: &Arena) -> RGBColorSpace {
let table_ptr = self.rgb_to_spectrum_table.upload(arena);
RGBColorSpace {
r: self.r,
g: self.g,
b: self.b,
w: self.w,
illuminant: self.illuminant.clone(),
rgb_to_spectrum_table: table_ptr,
xyz_from_rgb: self.xyz_from_rgb,
rgb_from_xyz: self.rgb_from_xyz,
}
}
}
impl DeviceRepr for DeviceStandardColorSpaces {
type Target = DeviceStandardColorSpaces;
fn upload_value(&self, arena: &Arena) -> DeviceStandardColorSpaces {
DeviceStandardColorSpaces {
srgb: self.srgb.upload(arena),
dci_p3: self.dci_p3.upload(arena),
rec2020: self.rec2020.upload(arena),
aces2065_1: self.aces2065_1.upload(arena),
}
}
}