Trying out allocate API, and just have a unified Vec and Ptr representation instead of duplicating everything and having DeviceRepr
This commit is contained in:
parent
dad7300a14
commit
fa4692bfe6
3 changed files with 565 additions and 0 deletions
160
shared/src/shapes/mesh.rs
Normal file
160
shared/src/shapes/mesh.rs
Normal file
|
|
@ -0,0 +1,160 @@
|
|||
use crate::core::geometry::{Normal3f, Point2f, Point3f, Vector3f};
|
||||
use crate::utils::sampling::DevicePiecewiseConstant2D;
|
||||
use crate::utils::Transform;
|
||||
use crate::{Float, Gvec};
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub struct TriangleMesh {
|
||||
pub p: GVec<Point3f>,
|
||||
pub n: GVec<Normal3f>,
|
||||
pub s: GVec<Vector3f>,
|
||||
pub uv: GVec<Point2f>,
|
||||
pub vertex_indices: GVec<i32>,
|
||||
pub face_indices: GVec<i32>,
|
||||
pub n_triangles: u32,
|
||||
pub n_vertices: u32,
|
||||
pub reverse_orientation: bool,
|
||||
pub transform_swaps_handedness: bool,
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct BilinearPatchMesh {
|
||||
pub p: GVec<Point3f>,
|
||||
pub n: GVec<Normal3f>,
|
||||
pub uv: GVec<Point2f>,
|
||||
pub vertex_indices: GVec<i32>,
|
||||
pub n_patches: u32,
|
||||
pub n_vertices: u32,
|
||||
pub reverse_orientation: bool,
|
||||
pub transform_swaps_handedness: bool,
|
||||
pub image_distribution: Ptr<PiecewiseConstant2D>,
|
||||
}
|
||||
|
||||
unsafe impl Send for TriangleMesh {}
|
||||
unsafe impl Sync for TriangleMesh {}
|
||||
unsafe impl Send for BilinearPatchMesh {}
|
||||
unsafe impl Sync for BilinearPatchMesh {}
|
||||
|
||||
impl TriangleMesh {
|
||||
pub fn new(
|
||||
render_from_object: &Transform,
|
||||
reverse_orientation: bool,
|
||||
vertex_indices: &[i32],
|
||||
p: &[Point3f],
|
||||
n: &[Normal3f],
|
||||
s: &[Vector3f],
|
||||
uv: &[Point2f],
|
||||
face_indices: &[i32],
|
||||
) -> Self {
|
||||
let n_triangles = (vertex_indices.len() / 3) as u32;
|
||||
let n_vertices = p.len() as u32;
|
||||
|
||||
let mut p_gvec = gvec_with_capacity(p.len());
|
||||
for pt in p {
|
||||
p_gvec.push(render_from_object.apply_to_point(*pt));
|
||||
}
|
||||
|
||||
let mut n_gvec = gvec_with_capacity(n.len());
|
||||
if !n.is_empty() {
|
||||
assert_eq!(n_vertices as usize, n.len(), "Normal count mismatch");
|
||||
for nn in n {
|
||||
let mut transformed = render_from_object.apply_to_normal(*nn);
|
||||
if reverse_orientation {
|
||||
transformed = -transformed;
|
||||
}
|
||||
n_gvec.push(transformed);
|
||||
}
|
||||
}
|
||||
|
||||
let mut s_gvec = gvec_with_capacity(s.len());
|
||||
if !s.is_empty() {
|
||||
assert_eq!(n_vertices as usize, s.len(), "Tangent count mismatch");
|
||||
for ss in s {
|
||||
s_gvec.push(render_from_object.apply_to_vector(*ss));
|
||||
}
|
||||
}
|
||||
|
||||
assert!(
|
||||
uv.is_empty() || uv.len() == n_vertices as usize,
|
||||
"UV count mismatch"
|
||||
);
|
||||
assert!(
|
||||
face_indices.is_empty() || face_indices.len() == n_triangles as usize,
|
||||
"Face index count mismatch"
|
||||
);
|
||||
|
||||
Self {
|
||||
vertex_indices: gvec_from_slice(vertex_indices),
|
||||
p: p_gvec,
|
||||
n: n_gvec,
|
||||
s: s_gvec,
|
||||
uv: gvec_from_slice(uv),
|
||||
face_indices: gvec_from_slice(face_indices),
|
||||
n_triangles,
|
||||
n_vertices,
|
||||
reverse_orientation,
|
||||
transform_swaps_handedness: render_from_object.swaps_handedness(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn positions(&self) -> &[Point3f] {
|
||||
&self.p
|
||||
}
|
||||
pub fn indices(&self) -> &[i32] {
|
||||
&self.vertex_indices
|
||||
}
|
||||
pub fn normals(&self) -> &[Normal3f] {
|
||||
&self.n
|
||||
}
|
||||
pub fn uvs(&self) -> &[Point2f] {
|
||||
&self.uv
|
||||
}
|
||||
}
|
||||
|
||||
impl BilinearPatchMesh {
|
||||
pub fn new(
|
||||
render_from_object: &Transform,
|
||||
reverse_orientation: bool,
|
||||
vertex_indices: &[i32],
|
||||
p: &[Point3f],
|
||||
n: &[Normal3f],
|
||||
uv: &[Point2f],
|
||||
image_distribution: Option<PiecewiseConstant2D>,
|
||||
) -> Self {
|
||||
let n_patches = (vertex_indices.len() / 4) as u32;
|
||||
let n_vertices = p.len() as u32;
|
||||
|
||||
let mut p_gvec = gvec_with_capacity(p.len());
|
||||
for pt in p {
|
||||
p_gvec.push(render_from_object.apply_to_point(*pt));
|
||||
}
|
||||
|
||||
let mut n_gvec = gvec_with_capacity(n.len());
|
||||
if !n.is_empty() {
|
||||
assert_eq!(n_vertices as usize, n.len());
|
||||
for nn in n {
|
||||
let mut transformed = render_from_object.apply_to_normal(*nn);
|
||||
if reverse_orientation {
|
||||
transformed = -transformed;
|
||||
}
|
||||
n_gvec.push(transformed);
|
||||
}
|
||||
}
|
||||
|
||||
assert!(uv.is_empty() || uv.len() == n_vertices as usize);
|
||||
|
||||
Self {
|
||||
vertex_indices: gvec_from_slice(vertex_indices),
|
||||
p: p_gvec,
|
||||
n: n_gvec,
|
||||
uv: gvec_from_slice(uv),
|
||||
image_distribution: Ptr::from(image_distribution),
|
||||
n_patches,
|
||||
n_vertices,
|
||||
reverse_orientation,
|
||||
transform_swaps_handedness: render_from_object.swaps_handedness(),
|
||||
}
|
||||
}
|
||||
}
|
||||
238
shared/src/utils/alloc.rs
Normal file
238
shared/src/utils/alloc.rs
Normal file
|
|
@ -0,0 +1,238 @@
|
|||
#![feature(allocator_api)]
|
||||
extern crate alloc;
|
||||
|
||||
use alloc::alloc::Global;
|
||||
use core::alloc::{AllocError, Allocator, Layout};
|
||||
use core::ptr::NonNull;
|
||||
|
||||
// CPU fallback, delegates to Global
|
||||
#[derive(Debug, Clone, Copy, Default)]
|
||||
pub struct SystemAlloc;
|
||||
|
||||
unsafe impl Allocator for SystemAlloc {
|
||||
fn allocate(&self, layout: Layout) -> Result<NonNull<[u8]>, AllocError> {
|
||||
Global.allocate(layout)
|
||||
}
|
||||
|
||||
unsafe fn deallocate(&self, ptr: NonNull<u8>, layout: Layout) {
|
||||
Global.deallocate(ptr, layout)
|
||||
}
|
||||
|
||||
unsafe fn grow(
|
||||
&self,
|
||||
ptr: NonNull<u8>,
|
||||
old_layout: Layout,
|
||||
new_layout: Layout,
|
||||
) -> Result<NonNull<[u8]>, AllocError> {
|
||||
Global.grow(ptr, old_layout, new_layout)
|
||||
}
|
||||
|
||||
unsafe fn shrink(
|
||||
&self,
|
||||
ptr: NonNull<u8>,
|
||||
old_layout: Layout,
|
||||
new_layout: Layout,
|
||||
) -> Result<NonNull<[u8]>, AllocError> {
|
||||
Global.shrink(ptr, old_layout, new_layout)
|
||||
}
|
||||
}
|
||||
|
||||
// Unified memory via cudaMallocManaged
|
||||
|
||||
#[cfg(feature = "cuda")]
|
||||
pub mod cuda {
|
||||
use super::*;
|
||||
use cust::memory::{cuda_free_unified, cuda_malloc_unified, UnifiedPointer};
|
||||
|
||||
#[derive(Debug, Clone, Copy, Default)]
|
||||
pub struct CudaAlloc;
|
||||
|
||||
unsafe impl Allocator for CudaAlloc {
|
||||
fn allocate(&self, layout: Layout) -> Result<NonNull<[u8]>, AllocError> {
|
||||
if layout.size() == 0 {
|
||||
// Zero-sized allocations: return a dangling aligned pointer
|
||||
// with zero length, which is valid for ZSTs.
|
||||
let ptr = NonNull::new(layout.align() as *mut u8).ok_or(AllocError)?;
|
||||
return Ok(NonNull::slice_from_raw_parts(ptr, 0));
|
||||
}
|
||||
|
||||
let ptr = cuda_malloc_unified::<u8>(layout.size()).map_err(|_| AllocError)?;
|
||||
|
||||
let raw = ptr.as_raw_mut();
|
||||
core::mem::forget(ptr); // Arena owns the raw pointer, not the RAII wrapper
|
||||
|
||||
let nn = NonNull::new(raw).ok_or(AllocError)?;
|
||||
Ok(NonNull::slice_from_raw_parts(nn, layout.size()))
|
||||
}
|
||||
|
||||
unsafe fn deallocate(&self, ptr: NonNull<u8>, layout: Layout) {
|
||||
if layout.size() == 0 {
|
||||
return;
|
||||
}
|
||||
let _ = cuda_free_unified(UnifiedPointer::wrap(ptr.as_ptr()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Host-visible GPU memory via gpu-allocator
|
||||
#[cfg(feature = "cuda")]
|
||||
pub mod cuda {
|
||||
use super::*;
|
||||
use cust::memory::{cuda_free_unified, cuda_malloc_unified, UnifiedPointer};
|
||||
|
||||
#[derive(Debug, Clone, Copy, Default)]
|
||||
pub struct CudaAlloc;
|
||||
|
||||
unsafe impl Allocator for CudaAlloc {
|
||||
fn allocate(&self, layout: Layout) -> Result<NonNull<[u8]>, AllocError> {
|
||||
if layout.size() == 0 {
|
||||
// Zero-sized allocations: return a dangling aligned pointer
|
||||
// with zero length, which is valid for ZSTs.
|
||||
let ptr = NonNull::new(layout.align() as *mut u8).ok_or(AllocError)?;
|
||||
return Ok(NonNull::slice_from_raw_parts(ptr, 0));
|
||||
}
|
||||
|
||||
let ptr = cuda_malloc_unified::<u8>(layout.size()).map_err(|_| AllocError)?;
|
||||
|
||||
let raw = ptr.as_raw_mut();
|
||||
core::mem::forget(ptr); // Arena owns the raw pointer, not the RAII wrapper
|
||||
|
||||
let nn = NonNull::new(raw).ok_or(AllocError)?;
|
||||
Ok(NonNull::slice_from_raw_parts(nn, layout.size()))
|
||||
}
|
||||
|
||||
unsafe fn deallocate(&self, ptr: NonNull<u8>, layout: Layout) {
|
||||
if layout.size() == 0 {
|
||||
return;
|
||||
}
|
||||
let _ = cuda_free_unified(UnifiedPointer::wrap(ptr.as_ptr()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Host-visible GPU memory via gpu-allocator
|
||||
|
||||
#[cfg(feature = "vulkan")]
|
||||
pub mod vulkan {
|
||||
use super::*;
|
||||
use ash::vk;
|
||||
use gpu_allocator::vulkan::{
|
||||
Allocation, AllocationCreateDesc, AllocationScheme, Allocator as VkAllocator,
|
||||
AllocatorCreateDesc,
|
||||
};
|
||||
use gpu_allocator::MemoryLocation;
|
||||
use parking_lot::Mutex;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
/// Wraps a gpu-allocator instance. Clone is cheap
|
||||
#[derive(Clone)]
|
||||
pub struct VulkanAlloc {
|
||||
inner: Arc<Mutex<VulkanInner>>,
|
||||
}
|
||||
|
||||
struct VulkanInner {
|
||||
allocator: VkAllocator,
|
||||
allocations: HashMap<usize, Allocation>,
|
||||
}
|
||||
|
||||
impl VulkanAlloc {
|
||||
pub fn new(
|
||||
instance: &ash::Instance,
|
||||
device: ash::Device,
|
||||
physical_device: vk::PhysicalDevice,
|
||||
) -> Self {
|
||||
let allocator = VkAllocator::new(&AllocatorCreateDesc {
|
||||
instance: instance.clone(),
|
||||
device: device.clone(),
|
||||
physical_device,
|
||||
debug_settings: Default::default(),
|
||||
buffer_device_address: false,
|
||||
allocation_sizes: Default::default(),
|
||||
})
|
||||
.expect("Failed to create Vulkan allocator");
|
||||
|
||||
Self {
|
||||
inner: Arc::new(Mutex::new(VulkanInner {
|
||||
allocator,
|
||||
allocations: HashMap::new(),
|
||||
})),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unsafe impl Allocator for VulkanAlloc {
|
||||
fn allocate(&self, layout: Layout) -> Result<NonNull<[u8]>, AllocError> {
|
||||
if layout.size() == 0 {
|
||||
let ptr = NonNull::new(layout.align() as *mut u8).ok_or(AllocError)?;
|
||||
return Ok(NonNull::slice_from_raw_parts(ptr, 0));
|
||||
}
|
||||
|
||||
let mut inner = self.inner.lock();
|
||||
|
||||
let allocation = inner
|
||||
.allocator
|
||||
.allocate(&AllocationCreateDesc {
|
||||
name: "gvec_alloc",
|
||||
requirements: vk::MemoryRequirements {
|
||||
size: layout.size() as u64,
|
||||
alignment: layout.align() as u64,
|
||||
memory_type_bits: u32::MAX,
|
||||
},
|
||||
location: MemoryLocation::CpuToGpu,
|
||||
linear: true,
|
||||
allocation_scheme: AllocationScheme::GpuAllocatorManaged,
|
||||
})
|
||||
.map_err(|_| AllocError)?;
|
||||
|
||||
let ptr = allocation.mapped_ptr().ok_or(AllocError)?.as_ptr() as *mut u8;
|
||||
|
||||
let nn = NonNull::new(ptr).ok_or(AllocError)?;
|
||||
inner.allocations.insert(ptr as usize, allocation);
|
||||
Ok(NonNull::slice_from_raw_parts(nn, layout.size()))
|
||||
}
|
||||
|
||||
unsafe fn deallocate(&self, ptr: NonNull<u8>, layout: Layout) {
|
||||
if layout.size() == 0 {
|
||||
return;
|
||||
}
|
||||
let mut inner = self.inner.lock();
|
||||
if let Some(allocation) = inner.allocations.remove(&(ptr.as_ptr() as usize)) {
|
||||
inner
|
||||
.allocator
|
||||
.free(allocation)
|
||||
.expect("Vulkan free failed");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "cuda")]
|
||||
pub type GpuAlloc = cuda::CudaAlloc;
|
||||
|
||||
#[cfg(all(feature = "vulkan", not(feature = "cuda")))]
|
||||
pub type GpuAlloc = vulkan::VulkanAlloc;
|
||||
|
||||
#[cfg(not(any(feature = "cuda", feature = "vulkan")))]
|
||||
pub type GpuAlloc = SystemAlloc;
|
||||
|
||||
pub type GVec<T> = alloc::vec::Vec<T, GpuAlloc>;
|
||||
pub type GBox<T> = alloc::boxed::Box<T, GpuAlloc>;
|
||||
|
||||
pub fn gvec<T>() -> GVec<T> {
|
||||
Vec::new_in(GpuAlloc::default())
|
||||
}
|
||||
|
||||
pub fn gvec_with_capacity<T>(cap: usize) -> GVec<T> {
|
||||
Vec::with_capacity_in(cap, GpuAlloc::default())
|
||||
}
|
||||
|
||||
pub fn gvec_from_slice<T: Clone>(slice: &[T]) -> GVec<T> {
|
||||
let mut v = gvec_with_capacity(slice.len());
|
||||
v.extend_from_slice(slice);
|
||||
v
|
||||
}
|
||||
|
||||
pub fn gbox<T>(value: T) -> GBox<T> {
|
||||
Box::new_in(value, GpuAlloc::default())
|
||||
}
|
||||
167
src/utils/upload.rs
Normal file
167
src/utils/upload.rs
Normal file
|
|
@ -0,0 +1,167 @@
|
|||
use crate::core::image::Image;
|
||||
use crate::spectra::DenselySampledSpectrumBuffer;
|
||||
use crate::Arena;
|
||||
use shared::core::color::RGBToSpectrumTable;
|
||||
use shared::core::image::DeviceImage;
|
||||
use shared::core::light::Light;
|
||||
use shared::core::material::Material;
|
||||
use shared::core::shape::Shape;
|
||||
use shared::core::spectrum::Spectrum;
|
||||
use shared::spectra::{DenselySampledSpectrum, DeviceStandardColorSpaces, RGBColorSpace};
|
||||
use shared::Ptr;
|
||||
use std::slice::from_raw_parts;
|
||||
|
||||
pub trait DeviceRepr {
|
||||
/// The `#[repr(C)] Copy` device-side struct.
|
||||
type Target: Copy;
|
||||
|
||||
/// Upload into the arena and return the device struct by value.
|
||||
/// Use this when embedding the result inline in another device struct.
|
||||
fn upload_value(&self, arena: &Arena) -> Self::Target;
|
||||
|
||||
/// Upload into the arena and return a Ptr to the device struct.
|
||||
/// This is the common entry point — allocates the Target in the arena.
|
||||
fn upload(&self, arena: &Arena) -> Ptr<Self::Target> {
|
||||
let value = self.upload_value(arena);
|
||||
arena.alloc(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: DeviceRepr> DeviceRepr for Option<T> {
|
||||
type Target = T::Target;
|
||||
|
||||
fn upload_value(&self, arena: &Arena) -> Self::Target {
|
||||
match self {
|
||||
Some(val) => val.upload_value(arena),
|
||||
None => panic!("Cannot upload_value on None — use upload() which returns Ptr::null()"),
|
||||
}
|
||||
}
|
||||
|
||||
fn upload(&self, arena: &Arena) -> Ptr<Self::Target> {
|
||||
match self {
|
||||
Some(val) => val.upload(arena),
|
||||
None => Ptr::null(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: DeviceRepr> DeviceRepr for std::sync::Arc<T> {
|
||||
type Target = T::Target;
|
||||
|
||||
fn upload_value(&self, arena: &Arena) -> Self::Target {
|
||||
(**self).upload_value(arena)
|
||||
}
|
||||
|
||||
fn upload(&self, arena: &Arena) -> Ptr<Self::Target> {
|
||||
(**self).upload(arena)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: DeviceRepr> DeviceRepr for Box<T> {
|
||||
type Target = T::Target;
|
||||
|
||||
fn upload_value(&self, arena: &Arena) -> Self::Target {
|
||||
(**self).upload_value(arena)
|
||||
}
|
||||
|
||||
fn upload(&self, arena: &Arena) -> Ptr<Self::Target> {
|
||||
(**self).upload(arena)
|
||||
}
|
||||
}
|
||||
|
||||
impl DeviceRepr for Shape {
|
||||
type Target = Shape;
|
||||
fn upload_value(&self, _arena: &Arena) -> Shape {
|
||||
self.clone()
|
||||
}
|
||||
}
|
||||
|
||||
impl DeviceRepr for Light {
|
||||
type Target = Light;
|
||||
fn upload_value(&self, _arena: &Arena) -> Light {
|
||||
self.clone()
|
||||
}
|
||||
}
|
||||
|
||||
impl DeviceRepr for Spectrum {
|
||||
type Target = Spectrum;
|
||||
fn upload_value(&self, _arena: &Arena) -> Spectrum {
|
||||
self.clone()
|
||||
}
|
||||
}
|
||||
|
||||
impl DeviceRepr for Material {
|
||||
type Target = Material;
|
||||
fn upload_value(&self, _arena: &Arena) -> Material {
|
||||
self.clone()
|
||||
}
|
||||
}
|
||||
|
||||
impl DeviceRepr for Image {
|
||||
type Target = DeviceImage;
|
||||
fn upload_value(&self, _arena: &Arena) -> DeviceImage {
|
||||
*self.device()
|
||||
}
|
||||
}
|
||||
|
||||
impl DeviceRepr for DenselySampledSpectrumBuffer {
|
||||
type Target = DenselySampledSpectrum;
|
||||
fn upload_value(&self, _arena: &Arena) -> DenselySampledSpectrum {
|
||||
self.device()
|
||||
}
|
||||
}
|
||||
|
||||
impl DeviceRepr for RGBToSpectrumTable {
|
||||
type Target = RGBToSpectrumTable;
|
||||
|
||||
fn upload_value(&self, arena: &Arena) -> RGBToSpectrumTable {
|
||||
let n_nodes = self.n_nodes as usize;
|
||||
|
||||
// Safety: these Ptrs point into static or previously-uploaded data;
|
||||
// we're copying the contents into the arena for a new lifetime.
|
||||
let z_slice = unsafe { from_raw_parts(self.z_nodes.as_raw(), n_nodes) };
|
||||
let (z_ptr, _) = arena.alloc_slice(z_slice);
|
||||
|
||||
let n_coeffs = 3 * n_nodes.pow(3);
|
||||
let coeffs_slice = unsafe { from_raw_parts(self.coeffs.as_raw(), n_coeffs) };
|
||||
let (c_ptr, _) = arena.alloc_slice(coeffs_slice);
|
||||
|
||||
RGBToSpectrumTable {
|
||||
z_nodes: z_ptr,
|
||||
coeffs: c_ptr,
|
||||
n_nodes: self.n_nodes,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl DeviceRepr for RGBColorSpace {
|
||||
type Target = RGBColorSpace;
|
||||
|
||||
fn upload_value(&self, arena: &Arena) -> RGBColorSpace {
|
||||
let table_ptr = self.rgb_to_spectrum_table.upload(arena);
|
||||
|
||||
RGBColorSpace {
|
||||
r: self.r,
|
||||
g: self.g,
|
||||
b: self.b,
|
||||
w: self.w,
|
||||
illuminant: self.illuminant.clone(),
|
||||
rgb_to_spectrum_table: table_ptr,
|
||||
xyz_from_rgb: self.xyz_from_rgb,
|
||||
rgb_from_xyz: self.rgb_from_xyz,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl DeviceRepr for DeviceStandardColorSpaces {
|
||||
type Target = DeviceStandardColorSpaces;
|
||||
|
||||
fn upload_value(&self, arena: &Arena) -> DeviceStandardColorSpaces {
|
||||
DeviceStandardColorSpaces {
|
||||
srgb: self.srgb.upload(arena),
|
||||
dci_p3: self.dci_p3.upload(arena),
|
||||
rec2020: self.rec2020.upload(arena),
|
||||
aces2065_1: self.aces2065_1.upload(arena),
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Reference in a new issue