209 lines
6.3 KiB
Rust
209 lines
6.3 KiB
Rust
extern crate alloc;
|
|
|
|
use crate::utils::ptr::Ptr;
|
|
use alloc::alloc::Global;
|
|
use alloc::boxed::Box;
|
|
use alloc::vec::Vec;
|
|
use core::alloc::{AllocError, Allocator, Layout};
|
|
use core::ptr::NonNull;
|
|
|
|
// CPU fallback to GlobalAllocator
|
|
#[derive(Debug, Clone, Copy, Default)]
|
|
pub struct SystemAlloc;
|
|
|
|
unsafe impl Allocator for SystemAlloc {
|
|
fn allocate(&self, layout: Layout) -> Result<NonNull<[u8]>, AllocError> {
|
|
Global.allocate(layout)
|
|
}
|
|
|
|
unsafe fn deallocate(&self, ptr: NonNull<u8>, layout: Layout) {
|
|
Global.deallocate(ptr, layout)
|
|
}
|
|
|
|
unsafe fn grow(
|
|
&self,
|
|
ptr: NonNull<u8>,
|
|
old_layout: Layout,
|
|
new_layout: Layout,
|
|
) -> Result<NonNull<[u8]>, AllocError> {
|
|
Global.grow(ptr, old_layout, new_layout)
|
|
}
|
|
|
|
unsafe fn shrink(
|
|
&self,
|
|
ptr: NonNull<u8>,
|
|
old_layout: Layout,
|
|
new_layout: Layout,
|
|
) -> Result<NonNull<[u8]>, AllocError> {
|
|
Global.shrink(ptr, old_layout, new_layout)
|
|
}
|
|
}
|
|
|
|
// Unified memory via cudaMallocManaged
|
|
#[cfg(feature = "cuda")]
|
|
pub mod cuda {
|
|
use super::*;
|
|
use cust::memory::{cuda_free_unified, cuda_malloc_unified, UnifiedPointer};
|
|
|
|
#[derive(Debug, Clone, Copy, Default)]
|
|
pub struct CudaAlloc;
|
|
|
|
unsafe impl Allocator for CudaAlloc {
|
|
fn allocate(&self, layout: Layout) -> Result<NonNull<[u8]>, AllocError> {
|
|
if layout.size() == 0 {
|
|
// Zero-sized allocations: return a dangling aligned pointer
|
|
// with zero length, which is valid for ZSTs.
|
|
let ptr = NonNull::new(layout.align() as *mut u8).ok_or(AllocError)?;
|
|
return Ok(NonNull::slice_from_raw_parts(ptr, 0));
|
|
}
|
|
|
|
let ptr = cuda_malloc_unified::<u8>(layout.size()).map_err(|_| AllocError)?;
|
|
|
|
let raw = ptr.as_raw_mut();
|
|
core::mem::forget(ptr); // Arena owns the raw pointer, not the RAII wrapper
|
|
|
|
let nn = NonNull::new(raw).ok_or(AllocError)?;
|
|
Ok(NonNull::slice_from_raw_parts(nn, layout.size()))
|
|
}
|
|
|
|
unsafe fn deallocate(&self, ptr: NonNull<u8>, layout: Layout) {
|
|
if layout.size() == 0 {
|
|
return;
|
|
}
|
|
let _ = cuda_free_unified(UnifiedPointer::wrap(ptr.as_ptr()));
|
|
}
|
|
}
|
|
}
|
|
|
|
// Host-visible GPU memory via gpu-allocator
|
|
|
|
#[cfg(feature = "vulkan")]
|
|
pub mod vulkan {
|
|
use super::*;
|
|
use ash::vk;
|
|
use gpu_allocator::vulkan::{
|
|
Allocation, AllocationCreateDesc, AllocationScheme, Allocator as VkAllocator,
|
|
AllocatorCreateDesc,
|
|
};
|
|
use gpu_allocator::MemoryLocation;
|
|
use parking_lot::Mutex;
|
|
use std::collections::HashMap;
|
|
use std::sync::Arc;
|
|
|
|
/// Wraps a gpu-allocator instance. Clone is cheap
|
|
#[derive(Clone)]
|
|
pub struct VulkanAlloc {
|
|
inner: Arc<Mutex<VulkanInner>>,
|
|
}
|
|
|
|
struct VulkanInner {
|
|
allocator: VkAllocator,
|
|
allocations: HashMap<usize, Allocation>,
|
|
}
|
|
|
|
impl VulkanAlloc {
|
|
pub fn new(
|
|
instance: &ash::Instance,
|
|
device: ash::Device,
|
|
physical_device: vk::PhysicalDevice,
|
|
) -> Self {
|
|
let allocator = VkAllocator::new(&AllocatorCreateDesc {
|
|
instance: instance.clone(),
|
|
device: device.clone(),
|
|
physical_device,
|
|
debug_settings: Default::default(),
|
|
buffer_device_address: false,
|
|
allocation_sizes: Default::default(),
|
|
})
|
|
.expect("Failed to create Vulkan allocator");
|
|
|
|
Self {
|
|
inner: Arc::new(Mutex::new(VulkanInner {
|
|
allocator,
|
|
allocations: HashMap::new(),
|
|
})),
|
|
}
|
|
}
|
|
}
|
|
|
|
unsafe impl Allocator for VulkanAlloc {
|
|
fn allocate(&self, layout: Layout) -> Result<NonNull<[u8]>, AllocError> {
|
|
if layout.size() == 0 {
|
|
let ptr = NonNull::new(layout.align() as *mut u8).ok_or(AllocError)?;
|
|
return Ok(NonNull::slice_from_raw_parts(ptr, 0));
|
|
}
|
|
|
|
let mut inner = self.inner.lock();
|
|
|
|
let allocation = inner
|
|
.allocator
|
|
.allocate(&AllocationCreateDesc {
|
|
name: "gvec_alloc",
|
|
requirements: vk::MemoryRequirements {
|
|
size: layout.size() as u64,
|
|
alignment: layout.align() as u64,
|
|
memory_type_bits: u32::MAX,
|
|
},
|
|
location: MemoryLocation::CpuToGpu,
|
|
linear: true,
|
|
allocation_scheme: AllocationScheme::GpuAllocatorManaged,
|
|
})
|
|
.map_err(|_| AllocError)?;
|
|
|
|
let ptr = allocation.mapped_ptr().ok_or(AllocError)?.as_ptr() as *mut u8;
|
|
|
|
let nn = NonNull::new(ptr).ok_or(AllocError)?;
|
|
inner.allocations.insert(ptr as usize, allocation);
|
|
Ok(NonNull::slice_from_raw_parts(nn, layout.size()))
|
|
}
|
|
|
|
unsafe fn deallocate(&self, ptr: NonNull<u8>, layout: Layout) {
|
|
if layout.size() == 0 {
|
|
return;
|
|
}
|
|
let mut inner = self.inner.lock();
|
|
if let Some(allocation) = inner.allocations.remove(&(ptr.as_ptr() as usize)) {
|
|
inner
|
|
.allocator
|
|
.free(allocation)
|
|
.expect("Vulkan free failed");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#[cfg(feature = "cuda")]
|
|
pub type GpuAlloc = cuda::CudaAlloc;
|
|
|
|
#[cfg(all(feature = "vulkan", not(feature = "cuda")))]
|
|
pub type GpuAlloc = vulkan::VulkanAlloc;
|
|
|
|
#[cfg(not(any(feature = "cuda", feature = "vulkan")))]
|
|
pub type GpuAlloc = SystemAlloc;
|
|
|
|
pub type GVec<T> = alloc::vec::Vec<T, GpuAlloc>;
|
|
pub type GBox<T> = alloc::boxed::Box<T, GpuAlloc>;
|
|
|
|
pub fn gvec<T>() -> GVec<T> {
|
|
Vec::new_in(GpuAlloc::default())
|
|
}
|
|
|
|
pub fn gvec_with_capacity<T>(cap: usize) -> GVec<T> {
|
|
Vec::with_capacity_in(cap, GpuAlloc::default())
|
|
}
|
|
|
|
pub fn gvec_from_slice<T: Clone>(slice: &[T]) -> GVec<T> {
|
|
let mut v = gvec_with_capacity(slice.len());
|
|
v.extend_from_slice(slice);
|
|
v
|
|
}
|
|
|
|
pub fn gbox<T>(value: T) -> GBox<T> {
|
|
Box::new_in(value, GpuAlloc::default())
|
|
}
|
|
|
|
pub fn leak<T: 'static>(val: T) -> Ptr<T> {
|
|
let b = gbox(val);
|
|
let leaked: &'static T = Box::leak(b);
|
|
Ptr::from(leaked)
|
|
}
|