pbrt/shared/src/utils/alloc.rs

extern crate alloc;

use crate::utils::ptr::Ptr;
use alloc::alloc::Global;
use alloc::boxed::Box;
use alloc::vec::Vec;
use core::alloc::{AllocError, Allocator, Layout};
use core::ptr::NonNull;

// CPU fallback to GlobalAllocator
#[derive(Debug, Clone, Copy, Default)]
pub struct SystemAlloc;

unsafe impl Allocator for SystemAlloc {
    fn allocate(&self, layout: Layout) -> Result<NonNull<[u8]>, AllocError> {
        Global.allocate(layout)
    }

    unsafe fn deallocate(&self, ptr: NonNull<u8>, layout: Layout) {
        Global.deallocate(ptr, layout)
    }

    unsafe fn grow(
        &self,
        ptr: NonNull<u8>,
        old_layout: Layout,
        new_layout: Layout,
    ) -> Result<NonNull<[u8]>, AllocError> {
        Global.grow(ptr, old_layout, new_layout)
    }

    unsafe fn shrink(
        &self,
        ptr: NonNull<u8>,
        old_layout: Layout,
        new_layout: Layout,
    ) -> Result<NonNull<[u8]>, AllocError> {
        Global.shrink(ptr, old_layout, new_layout)
    }
}

// Unified memory via cudaMallocManaged
#[cfg(feature = "cuda")]
pub mod cuda {
    use super::*;
    use cust::memory::{cuda_free_unified, cuda_malloc_unified, UnifiedPointer};

    #[derive(Debug, Clone, Copy, Default)]
    pub struct CudaAlloc;

    unsafe impl Allocator for CudaAlloc {
        fn allocate(&self, layout: Layout) -> Result<NonNull<[u8]>, AllocError> {
            if layout.size() == 0 {
                // Zero-sized allocations: return a dangling aligned pointer
                // with zero length, which is valid for ZSTs.
                let ptr = NonNull::new(layout.align() as *mut u8).ok_or(AllocError)?;
                return Ok(NonNull::slice_from_raw_parts(ptr, 0));
            }

            let ptr = cuda_malloc_unified::<u8>(layout.size()).map_err(|_| AllocError)?;

            let raw = ptr.as_raw_mut();
            core::mem::forget(ptr); // Arena owns the raw pointer, not the RAII wrapper

            let nn = NonNull::new(raw).ok_or(AllocError)?;
            Ok(NonNull::slice_from_raw_parts(nn, layout.size()))
        }

        unsafe fn deallocate(&self, ptr: NonNull<u8>, layout: Layout) {
            if layout.size() == 0 {
                return;
            }
            let _ = cuda_free_unified(UnifiedPointer::wrap(ptr.as_ptr()));
        }
    }
}

// Host-visible GPU memory via gpu-allocator

#[cfg(feature = "vulkan")]
pub mod vulkan {
    use super::*;
    use ash::vk;
    use gpu_allocator::vulkan::{
        Allocation, AllocationCreateDesc, AllocationScheme, Allocator as VkAllocator,
        AllocatorCreateDesc,
    };
    use gpu_allocator::MemoryLocation;
    use parking_lot::Mutex;
    use std::collections::HashMap;
    use std::sync::Arc;

    /// Wraps a gpu-allocator instance. Clone is cheap
    #[derive(Clone)]
    pub struct VulkanAlloc {
        inner: Arc<Mutex<VulkanInner>>,
    }

    struct VulkanInner {
        allocator: VkAllocator,
        allocations: HashMap<usize, Allocation>,
    }

    impl VulkanAlloc {
        pub fn new(
            instance: &ash::Instance,
            device: ash::Device,
            physical_device: vk::PhysicalDevice,
        ) -> Self {
            let allocator = VkAllocator::new(&AllocatorCreateDesc {
                instance: instance.clone(),
                device: device.clone(),
                physical_device,
                debug_settings: Default::default(),
                buffer_device_address: false,
                allocation_sizes: Default::default(),
            })
            .expect("Failed to create Vulkan allocator");

            Self {
                inner: Arc::new(Mutex::new(VulkanInner {
                    allocator,
                    allocations: HashMap::new(),
                })),
            }
        }
    }

    unsafe impl Allocator for VulkanAlloc {
        fn allocate(&self, layout: Layout) -> Result<NonNull<[u8]>, AllocError> {
            if layout.size() == 0 {
                let ptr = NonNull::new(layout.align() as *mut u8).ok_or(AllocError)?;
                return Ok(NonNull::slice_from_raw_parts(ptr, 0));
            }

            let mut inner = self.inner.lock();

            let allocation = inner
                .allocator
                .allocate(&AllocationCreateDesc {
                    name: "gvec_alloc",
                    requirements: vk::MemoryRequirements {
                        size: layout.size() as u64,
                        alignment: layout.align() as u64,
                        memory_type_bits: u32::MAX,
                    },
                    location: MemoryLocation::CpuToGpu,
                    linear: true,
                    allocation_scheme: AllocationScheme::GpuAllocatorManaged,
                })
                .map_err(|_| AllocError)?;

            let ptr = allocation.mapped_ptr().ok_or(AllocError)?.as_ptr() as *mut u8;

            let nn = NonNull::new(ptr).ok_or(AllocError)?;
            inner.allocations.insert(ptr as usize, allocation);
            Ok(NonNull::slice_from_raw_parts(nn, layout.size()))
        }

        unsafe fn deallocate(&self, ptr: NonNull<u8>, layout: Layout) {
            if layout.size() == 0 {
                return;
            }
            let mut inner = self.inner.lock();
            if let Some(allocation) = inner.allocations.remove(&(ptr.as_ptr() as usize)) {
                inner
                    .allocator
                    .free(allocation)
                    .expect("Vulkan free failed");
            }
        }
    }
}

#[cfg(feature = "cuda")]
pub type GpuAlloc = cuda::CudaAlloc;

#[cfg(all(feature = "vulkan", not(feature = "cuda")))]
pub type GpuAlloc = vulkan::VulkanAlloc;

#[cfg(not(any(feature = "cuda", feature = "vulkan")))]
pub type GpuAlloc = SystemAlloc;

pub type GVec<T> = alloc::vec::Vec<T, GpuAlloc>;
pub type GBox<T> = alloc::boxed::Box<T, GpuAlloc>;

pub fn gvec<T>() -> GVec<T> {
    Vec::new_in(GpuAlloc::default())
}

pub fn gvec_with_capacity<T>(cap: usize) -> GVec<T> {
    Vec::with_capacity_in(cap, GpuAlloc::default())
}

pub fn gvec_from_slice<T: Clone>(slice: &[T]) -> GVec<T> {
    let mut v = gvec_with_capacity(slice.len());
    v.extend_from_slice(slice);
    v
}

pub fn gbox<T>(value: T) -> GBox<T> {
    Box::new_in(value, GpuAlloc::default())
}

pub fn leak<T: 'static>(val: T) -> Ptr<T> {
    let b = gbox(val);
    let leaked: &'static T = Box::leak(b);
    Ptr::from(leaked)
}