Hitting myself, and going for a trait for uploading objs to GPU. Keep forgetting to actually upload them

This commit is contained in:
Wito Wiala 2026-05-16 02:36:20 +01:00
parent 645556da22
commit dad7300a14
20 changed files with 1917 additions and 782 deletions

View file

@ -13,13 +13,13 @@ pub struct FilterSample {
#[repr(C)]
#[derive(Clone, Debug, Copy)]
pub struct FilterSampler {
pub struct DeviceFilterSampler {
pub domain: Bounds2f,
pub distrib: DevicePiecewiseConstant2D,
pub f: DeviceArray2D<Float>,
}
impl FilterSampler {
impl DeviceFilterSampler {
pub fn sample(&self, u: Point2f) -> FilterSample {
let (p, pdf, pi) = self.distrib.sample(u);
@ -38,7 +38,7 @@ pub trait FilterTrait {
fn radius(&self) -> Vector2f;
fn evaluate(&self, p: Point2f) -> Float;
fn integral(&self) -> Float;
fn sample(&self, u: Point2f) -> FilterSample;
fn sample(&self, u: Point2f) -> DeviceFilterSample;
}
#[repr(C)]

View file

@ -205,6 +205,7 @@ impl PrimitiveTrait for KdTreeAggregate {
}
}
#[repr(C)]
#[derive(Clone, Debug, Copy)]
#[enum_dispatch(PrimitiveTrait)]
pub enum Primitive {

View file

@ -1,5 +1,5 @@
use crate::Float;
use crate::core::filter::{FilterSample, FilterTrait};
use crate::core::filter::{DeviceFilterSample, FilterTrait};
use crate::core::geometry::{Point2f, Vector2f};
use crate::utils::math::lerp;
@ -31,7 +31,7 @@ impl FilterTrait for BoxFilter {
(2.0 * self.radius.x()) * (2.0 * self.radius.y())
}
fn sample(&self, u: Point2f) -> FilterSample {
fn sample(&self, u: Point2f) -> DeviceFilterSample {
let p = Point2f::new(
lerp(u[0], -self.radius.x(), self.radius.x()),
lerp(u[1], -self.radius.y(), self.radius.y()),

View file

@ -1,5 +1,5 @@
use crate::Float;
use crate::core::filter::{FilterSample, FilterSampler, FilterTrait};
use crate::core::filter::{DeviceFilterSample, FilterSampler, FilterTrait};
use crate::core::geometry::{Point2f, Vector2f};
use crate::utils::math::{gaussian, gaussian_integral};
@ -30,7 +30,7 @@ impl FilterTrait for GaussianFilter {
- 2.0 * self.radius.y() * self.exp_y)
}
fn sample(&self, u: Point2f) -> FilterSample {
fn sample(&self, u: Point2f) -> DeviceFilterSample {
self.sampler.sample(u)
}
}

View file

@ -1,5 +1,5 @@
use crate::Float;
use crate::core::filter::{FilterSample, FilterSampler, FilterTrait};
use crate::core::filter::{DeviceFilterSample, FilterSampler, FilterTrait};
use crate::core::geometry::{Point2f, Vector2f};
use crate::utils::math::{lerp, windowed_sinc};
@ -26,7 +26,7 @@ impl FilterTrait for LanczosSincFilter {
self.integral
}
fn sample(&self, u: Point2f) -> FilterSample {
fn sample(&self, u: Point2f) -> DeviceFilterSample {
self.sampler.sample(u)
}
}

View file

@ -1,5 +1,5 @@
use crate::Float;
use crate::core::filter::{FilterSample, FilterSampler, FilterTrait};
use crate::core::filter::{DeviceFilterSample, FilterSampler, FilterTrait};
use crate::core::geometry::{Point2f, Vector2f};
use num_traits::Float as NumFloat;
@ -9,7 +9,7 @@ pub struct MitchellFilter {
pub radius: Vector2f,
pub b: Float,
pub c: Float,
pub sampler: FilterSampler,
pub sampler: DeviceFilterSampler,
}
impl MitchellFilter {
@ -50,7 +50,7 @@ impl FilterTrait for MitchellFilter {
self.radius.x() * self.radius.y() / 4.0
}
fn sample(&self, u: Point2f) -> FilterSample {
fn sample(&self, u: Point2f) -> DeviceFilterSample {
self.sampler.sample(u)
}
}

View file

@ -1,5 +1,5 @@
use crate::Float;
use crate::core::filter::{FilterSample, FilterTrait};
use crate::core::filter::{DeviceFilterSample, FilterTrait};
use crate::core::geometry::{Point2f, Vector2f};
use crate::utils::math::sample_tent;
use num_traits::Float as NumFloat;
@ -29,11 +29,11 @@ impl FilterTrait for TriangleFilter {
self.radius.x().powi(2) * self.radius.y().powi(2)
}
fn sample(&self, u: Point2f) -> FilterSample {
fn sample(&self, u: Point2f) -> DeviceFilterSample {
let p = Point2f::new(
sample_tent(u[0], self.radius.x()),
sample_tent(u[1], self.radius.y()),
);
FilterSample { p, weight: 1.0 }
DeviceFilterSample { p, weight: 1.0 }
}
}

View file

@ -18,6 +18,15 @@ pub use options::PBRTOptions;
pub use ptr::Ptr;
pub use transform::{AnimatedTransform, Transform, TransformGeneric};
use proc_macro::TokenStream;
use proc_macro2::TokenStream as TokenStream2;
use quote::{format_ident, quote};
use syn::{
parse_macro_input, Attribute, Data, DeriveInput, Expr, Fields, GenericArgument, Ident, Lit,
PathArguments, Type, Variant,
};
use crate::Float;
use core::sync::atomic::{AtomicU32, Ordering};
@ -128,3 +137,515 @@ pub fn gpu_array_from_fn<T, const N: usize>(mut f: impl FnMut(usize) -> T) -> [T
arr.assume_init()
}
}
/// # Enum variant attributes
///
/// | Attribute | Effect |
/// |-----------|--------|
/// | *(none)* | Inner type has `DeviceRepr`; auto-call `upload_value` |
/// | `#[device(clone)]` | Same type on both sides, just clone |
/// | `#[device(custom = "method")]` | You provide `fn method(inner: &T, arena) -> DeviceT` |
/// | `#[device(variant_type = "T")]` | Override the device-side variant's inner type |
///
/// # Container attribute
///
/// `#[device(name = "DeviceFoo")]` — override the generated type name (default: `Device{Name}`).
#[proc_macro_derive(Device, attributes(device))]
pub fn derive_device(input: TokenStream) -> TokenStream {
let input = parse_macro_input!(input as DeriveInput);
match derive_impl(input) {
Ok(tokens) => tokens.into(),
Err(e) => e.to_compile_error().into(),
}
}
fn derive_impl(input: DeriveInput) -> syn::Result<TokenStream2> {
match &input.data {
Data::Struct(_) => derive_struct(input),
Data::Enum(_) => derive_enum(input),
Data::Union(_) => Err(syn::Error::new_spanned(
&input.ident,
"Device derive does not support unions",
)),
}
}
// Struct derivation
fn derive_struct(input: DeriveInput) -> syn::Result<TokenStream2> {
let host_name = &input.ident;
let vis = &input.vis;
let device_name = get_device_name(&input.attrs, host_name)?;
let fields = match &input.data {
Data::Struct(s) => match &s.fields {
Fields::Named(named) => &named.named,
_ => {
return Err(syn::Error::new_spanned(
host_name,
"Device derive only supports structs with named fields",
))
}
},
_ => unreachable!(),
};
let mut device_fields = Vec::new();
let mut upload_stmts = Vec::new();
let mut device_field_inits = Vec::new();
let mut spread_expr: Option<Expr> = None;
for field in fields {
let field_name = field.ident.as_ref().unwrap();
let attrs = parse_field_attrs(&field.attrs)?;
if attrs.skip {
continue;
}
if let Some(ref expr_str) = attrs.spread {
spread_expr = Some(syn::parse_str(expr_str).map_err(|e| {
syn::Error::new_spanned(field, format!("invalid device(spread): {}", e))
})?);
continue;
}
if let Some(expr_str) = &attrs.expr {
let expr: Expr = syn::parse_str(expr_str).map_err(|e| {
syn::Error::new_spanned(field, format!("invalid device(expr): {}", e))
})?;
let ty = &field.ty;
device_fields.push(quote! { pub #field_name: #ty });
upload_stmts.push(quote! { let #field_name = #expr; });
device_field_inits.push(quote! { #field_name });
continue;
}
match classify_type(&field.ty) {
FieldClass::VecCopy(inner_ty) => {
let len_name = format_ident!("{}_len", field_name);
device_fields.push(quote! { pub #field_name: Ptr<#inner_ty> });
device_fields.push(quote! { pub #len_name: usize });
upload_stmts.push(quote! {
let (#field_name, #len_name) = arena.alloc_slice(&self.#field_name);
});
device_field_inits.push(quote! { #field_name });
device_field_inits.push(quote! { #len_name });
}
FieldClass::VecUploadable(inner_ty) => {
let len_name = format_ident!("{}_len", field_name);
device_fields.push(quote! {
pub #field_name: Ptr<<#inner_ty as DeviceRepr>::Target>
});
device_fields.push(quote! { pub #len_name: usize });
upload_stmts.push(quote! {
let __up: Vec<<#inner_ty as DeviceRepr>::Target> = self.#field_name
.iter()
.map(|item| DeviceRepr::upload_value(item, arena))
.collect();
let (#field_name, #len_name) = arena.alloc_slice(&__up);
});
device_field_inits.push(quote! { #field_name });
device_field_inits.push(quote! { #len_name });
}
FieldClass::Option(inner_ty) => {
device_fields.push(quote! {
pub #field_name: Ptr<<#inner_ty as DeviceRepr>::Target>
});
upload_stmts.push(quote! {
let #field_name = match &self.#field_name {
Some(val) => DeviceRepr::upload(val, arena),
None => Ptr::null(),
};
});
device_field_inits.push(quote! { #field_name });
}
FieldClass::Arc(inner_ty) => {
if attrs.flatten {
device_fields.push(quote! {
pub #field_name: <#inner_ty as DeviceRepr>::Target
});
upload_stmts.push(quote! {
let #field_name = DeviceRepr::upload_value(&*self.#field_name, arena);
});
} else {
device_fields.push(quote! {
pub #field_name: Ptr<<#inner_ty as DeviceRepr>::Target>
});
upload_stmts.push(quote! {
let #field_name = DeviceRepr::upload(&*self.#field_name, arena);
});
}
device_field_inits.push(quote! { #field_name });
}
FieldClass::Plain => {
let ty = &field.ty;
if attrs.copy_upload {
device_fields.push(quote! { pub #field_name: #ty });
upload_stmts.push(quote! {
let #field_name = self.#field_name.clone();
});
} else if attrs.flatten {
device_fields.push(quote! {
pub #field_name: <#ty as DeviceRepr>::Target
});
upload_stmts.push(quote! {
let #field_name = DeviceRepr::upload_value(&self.#field_name, arena);
});
} else if attrs.upload {
device_fields.push(quote! {
pub #field_name: Ptr<<#ty as DeviceRepr>::Target>
});
upload_stmts.push(quote! {
let #field_name = DeviceRepr::upload(&self.#field_name, arena);
});
} else {
device_fields.push(quote! { pub #field_name: #ty });
upload_stmts.push(quote! {
let #field_name = self.#field_name;
});
}
device_field_inits.push(quote! { #field_name });
}
}
}
let constructor = if let Some(spread) = spread_expr {
quote! {
#device_name {
#(#device_field_inits,)*
..#spread
}
}
} else {
quote! {
#device_name {
#(#device_field_inits,)*
}
}
};
Ok(quote! {
#[repr(C)]
#[derive(Debug, Copy, Clone)]
#vis struct #device_name {
#(#device_fields,)*
}
unsafe impl Send for #device_name {}
unsafe impl Sync for #device_name {}
impl DeviceRepr for #host_name {
type Target = #device_name;
fn upload_value<A: GpuAllocator>(&self, arena: &Arena<A>) -> Self::Target {
#(#upload_stmts)*
#constructor
}
}
})
}
// Enum derivation
fn derive_enum(input: DeriveInput) -> syn::Result<TokenStream2> {
let host_name = &input.ident;
let vis = &input.vis;
let device_name = get_device_name(&input.attrs, host_name)?;
let variants = match &input.data {
Data::Enum(e) => &e.variants,
_ => unreachable!(),
};
let mut device_variants = Vec::new();
let mut match_arms = Vec::new();
for variant in variants {
let var_name = &variant.ident;
let var_attrs = parse_variant_attrs(&variant.attrs)?;
let inner_ty = get_variant_inner_type(variant)?;
// Determine the device-side inner type for this variant
let device_inner: Type = if let Some(ref ty_str) = var_attrs.variant_type {
syn::parse_str(ty_str).map_err(|e| {
syn::Error::new_spanned(variant, format!("invalid variant_type: {}", e))
})?
} else if var_attrs.clone_variant {
// clone: same type on both sides
inner_ty.clone()
} else {
// auto-upload: use DeviceRepr::Target
syn::parse_str(&format!("<{} as DeviceRepr>::Target", quote!(#inner_ty))).map_err(
|e| {
syn::Error::new_spanned(variant, format!("cannot construct Target type: {}", e))
},
)?
};
device_variants.push(quote! { #var_name(#device_inner) });
if var_attrs.clone_variant {
match_arms.push(quote! {
#host_name::#var_name(inner) => #device_name::#var_name(inner.clone())
});
} else if let Some(ref method) = var_attrs.custom {
let method_ident = format_ident!("{}", method);
match_arms.push(quote! {
#host_name::#var_name(inner) => {
#device_name::#var_name(Self::#method_ident(inner, arena))
}
});
} else {
// Default: inner implements DeviceRepr
match_arms.push(quote! {
#host_name::#var_name(inner) => {
#device_name::#var_name(DeviceRepr::upload_value(inner, arena))
}
});
}
}
Ok(quote! {
#[repr(C)]
#[derive(Debug, Copy, Clone)]
#vis enum #device_name {
#(#device_variants,)*
}
unsafe impl Send for #device_name {}
unsafe impl Sync for #device_name {}
impl DeviceRepr for #host_name {
type Target = #device_name;
fn upload_value<A: GpuAllocator>(&self, arena: &Arena<A>) -> Self::Target {
match self {
#(#match_arms,)*
}
}
}
})
}
fn get_variant_inner_type(variant: &Variant) -> syn::Result<Type> {
match &variant.fields {
Fields::Unnamed(fields) if fields.unnamed.len() == 1 => {
Ok(fields.unnamed.first().unwrap().ty.clone())
}
Fields::Unit => Err(syn::Error::new_spanned(
variant,
"Device derive: enum variants must have exactly one field, e.g. Variant(Type)",
)),
_ => Err(syn::Error::new_spanned(
variant,
"Device derive: only single-field tuple variants supported, e.g. Variant(Type)",
)),
}
}
// Attribute parsing for variants
struct VariantAttrs {
clone_variant: bool,
custom: Option<String>,
variant_type: Option<String>,
}
fn parse_variant_attrs(attrs: &[Attribute]) -> syn::Result<VariantAttrs> {
let mut result = VariantAttrs {
clone_variant: false,
custom: None,
variant_type: None,
};
for attr in attrs {
if !attr.path().is_ident("device") {
continue;
}
attr.parse_nested_meta(|meta| {
if meta.path.is_ident("clone") {
result.clone_variant = true;
Ok(())
} else if meta.path.is_ident("custom") {
let value = meta.value()?;
let lit: Lit = value.parse()?;
if let Lit::Str(s) = lit {
result.custom = Some(s.value());
Ok(())
} else {
Err(meta.error("expected string literal"))
}
} else if meta.path.is_ident("variant_type") {
let value = meta.value()?;
let lit: Lit = value.parse()?;
if let Lit::Str(s) = lit {
result.variant_type = Some(s.value());
Ok(())
} else {
Err(meta.error("expected string literal"))
}
} else {
Err(meta.error("unknown device variant attribute"))
}
})?;
}
Ok(result)
}
// Attribute parsing for fields
struct FieldAttrs {
skip: bool,
expr: Option<String>,
copy_upload: bool,
flatten: bool,
upload: bool,
spread: Option<String>,
}
fn parse_field_attrs(attrs: &[Attribute]) -> syn::Result<FieldAttrs> {
let mut result = FieldAttrs {
skip: false,
expr: None,
copy_upload: false,
flatten: false,
upload: false,
spread: None,
};
for attr in attrs {
if !attr.path().is_ident("device") {
continue;
}
attr.parse_nested_meta(|meta| {
if meta.path.is_ident("skip") {
result.skip = true;
Ok(())
} else if meta.path.is_ident("expr") {
let value = meta.value()?;
let lit: Lit = value.parse()?;
if let Lit::Str(s) = lit {
result.expr = Some(s.value());
Ok(())
} else {
Err(meta.error("expected string literal"))
}
} else if meta.path.is_ident("copy_upload") {
result.copy_upload = true;
Ok(())
} else if meta.path.is_ident("flatten") {
result.flatten = true;
Ok(())
} else if meta.path.is_ident("upload") {
result.upload = true;
Ok(())
} else if meta.path.is_ident("spread") {
let value = meta.value()?;
let lit: Lit = value.parse()?;
if let Lit::Str(s) = lit {
result.spread = Some(s.value());
Ok(())
} else {
Err(meta.error("expected string literal"))
}
} else {
Err(meta.error("unknown device attribute"))
}
})?;
}
Ok(result)
}
// Container-level name attribute
fn get_device_name(attrs: &[Attribute], host_name: &Ident) -> syn::Result<Ident> {
for attr in attrs {
if !attr.path().is_ident("device") {
continue;
}
let mut name = None;
attr.parse_nested_meta(|meta| {
if meta.path.is_ident("name") {
let value = meta.value()?;
let lit: Lit = value.parse()?;
if let Lit::Str(s) = lit {
name = Some(format_ident!("{}", s.value()));
Ok(())
} else {
Err(meta.error("expected string literal"))
}
} else {
Ok(())
}
})?;
if let Some(n) = name {
return Ok(n);
}
}
Ok(format_ident!("Device{}", host_name))
}
// Type classification
enum FieldClass {
VecCopy(Type),
VecUploadable(Type),
Option(Type),
Arc(Type),
Plain,
}
fn classify_type(ty: &Type) -> FieldClass {
if let Some(inner) = extract_generic_arg(ty, "Vec") {
if is_copy_primitive(&inner) {
FieldClass::VecCopy(inner)
} else {
FieldClass::VecUploadable(inner)
}
} else if let Some(inner) = extract_generic_arg(ty, "Option") {
FieldClass::Option(inner)
} else if let Some(inner) = extract_generic_arg(ty, "Arc") {
FieldClass::Arc(inner)
} else {
FieldClass::Plain
}
}
fn extract_generic_arg(ty: &Type, wrapper: &str) -> Option<Type> {
if let Type::Path(type_path) = ty {
let seg = type_path.path.segments.last()?;
if seg.ident != wrapper {
return None;
}
if let PathArguments::AngleBracketed(args) = &seg.arguments {
if let Some(GenericArgument::Type(inner)) = args.args.first() {
return Some(inner.clone());
}
}
}
None
}
fn is_copy_primitive(ty: &Type) -> bool {
if let Type::Path(type_path) = ty {
if let Some(seg) = type_path.path.segments.last() {
let name = seg.ident.to_string();
return matches!(
name.as_str(),
"f32"
| "f64"
| "u8"
| "u16"
| "u32"
| "u64"
| "i8"
| "i16"
| "i32"
| "i64"
| "usize"
| "isize"
| "bool"
| "Float"
);
}
}
false
}

View file

@ -695,8 +695,8 @@ impl VarianceEstimator {
}
}
#[derive(Debug, Copy, Clone, Default)]
#[repr(C)]
#[derive(Debug, Copy, Clone, Default)]
pub struct PLSample {
pub p: Point2f,
pub pdf: Float,

View file

@ -1,5 +1,5 @@
use rayon::prelude::*;
use shared::core::aggregates::DeviceBVHAggregate;
use shared::core::aggregates::{{DeviceBVHAggregate, LinearBVHNode};
use shared::core::geometry::{Bounds3f, Point3f, Ray, Vector3f};
use shared::core::primitive::{Primitive, PrimitiveTrait};
use shared::core::shape::ShapeIntersection;
@ -26,14 +26,6 @@ struct BVHSplitBucket {
pub bounds: Bounds3f,
}
#[derive(Debug, Clone, Default)]
pub struct LinearBVHNode {
pub bounds: Bounds3f,
pub primitives_offset: usize,
pub n_primitives: u16,
pub axis: u8,
pub pad: u8,
}
#[derive(Debug, Clone, Copy, Default)]
struct MortonPrimitive {
@ -48,7 +40,7 @@ struct LBVHTreelet {
#[derive(Debug, Clone)]
pub struct BVHPrimitiveInfo {
primitive_number: usize, // Index into the original primitives vector
primitive_number: usize,
bounds: Bounds3f,
centroid: Point3f,
}

View file

@ -1,12 +1,13 @@
use crate::filters::*;
use crate::utils::containers::Array2D;
use crate::utils::sampling::PiecewiseConstant2D;
use crate::utils::DeviceRepr;
use crate::utils::{FileLoc, ParameterDictionary};
use anyhow::{Result, anyhow};
use shared::Float;
use shared::core::filter::{Filter, FilterSampler};
use anyhow::{anyhow, Result};
use shared::core::filter::{DeviceFilterSampler, Filter};
use shared::core::geometry::{Bounds2f, Point2f, Vector2f};
use shared::filters::*;
use shared::Float;
pub trait FilterFactory {
fn create(name: &str, params: &ParameterDictionary, loc: &FileLoc) -> Result<Filter>;
@ -54,14 +55,16 @@ impl FilterFactory for Filter {
}
}
pub trait CreateFilterSampler {
fn new<F>(radius: Vector2f, func: F) -> Self
where
F: Fn(Point2f) -> Float;
#[repr(C)]
#[derive(Clone, Debug, Copy)]
pub struct FilterSampler {
pub domain: Bounds2f,
pub distrib: PiecewiseConstant2D,
pub f: Array2D<Float>,
}
impl CreateFilterSampler for FilterSampler {
fn new<F>(radius: Vector2f, func: F) -> Self
impl FilterSampler {
pub fn new<F>(radius: Vector2f, func: F) -> Self
where
F: Fn(Point2f) -> Float,
{
@ -72,7 +75,6 @@ impl CreateFilterSampler for FilterSampler {
let nx = (32.0 * radius.x()) as i32;
let ny = (32.0 * radius.y()) as i32;
let mut f = Array2D::new_dims(nx, ny);
for y in 0..f.y_size() {
for x in 0..f.x_size() {
@ -83,11 +85,21 @@ impl CreateFilterSampler for FilterSampler {
f[(x as i32, y as i32)] = func(p);
}
}
let distrib = PiecewiseConstant2D::new_with_bounds(&f, domain);
Self {
domain,
f: *f.device(),
distrib: distrib.device,
Self { domain, distrib, f }
}
}
impl DeviceRepr for FilterSampler {
type Target = DeviceFilterSampler;
fn upload_value<A: GpuAllocator>(&self, arena: &Arena<A>) -> DeviceFilterSampler {
DeviceFilterSampler {
domain: self.domain,
distrib: self.distrib.upload_value(arena),
f: self.f.upload_value(arena),
}
}
}

View file

@ -5,27 +5,26 @@ use shared::core::geometry::{Point2f, Vector2f};
use shared::filters::GaussianFilter;
use shared::utils::math::gaussian;
pub trait GaussianFilterCreator {
fn new(radius: Vector2f, sigma: Float) -> Self;
#[derive(Clone, Debug, Device)]
#[device(name = "GaussianFilter")]
pub struct GaussianFilterHost {
pub radius: Vector2f,
pub sigma: Float,
pub exp_x: Float,
pub exp_y: Float,
#[device(flatten)]
pub sampler: FilterSampler,
}
impl GaussianFilterCreator for GaussianFilter {
fn new(radius: Vector2f, sigma: Float) -> Self {
let exp_x = gaussian(radius.x(), 0., sigma);
let exp_y = gaussian(radius.y(), 0., sigma);
impl GaussianFilterHost {
pub fn new(radius: Vector2f, sigma: Float) -> Self {
let exp_x = gaussian(radius.x(), 0.0, sigma);
let exp_y = gaussian(radius.y(), 0.0, sigma);
let sampler = FilterSampler::new(radius, move |p: Point2f| {
let gx = (gaussian(p.x(), 0., sigma) - exp_x).max(0.0);
let gy = (gaussian(p.y(), 0., sigma) - exp_y).max(0.0);
let gx = (gaussian(p.x(), 0.0, sigma) - exp_x).max(0.0);
let gy = (gaussian(p.y(), 0.0, sigma) - exp_y).max(0.0);
gx * gy
});
Self {
radius,
sigma,
exp_x: gaussian(radius.x(), 0., sigma),
exp_y: gaussian(radius.y(), 0., sigma),
sampler,
}
Self { radius, sigma, exp_x, exp_y, sampler }
}
}

View file

@ -6,43 +6,35 @@ use shared::core::geometry::{Point2f, Vector2f};
use shared::filters::LanczosSincFilter;
use shared::utils::math::{lerp, windowed_sinc};
pub trait LanczosFilterCreator {
fn new(radius: Vector2f, tau: Float) -> Self;
#[derive(Clone, Debug, Device)]
#[device(name = "LanczosSincFilter")]
pub struct LanczosSincFilterHost {
pub radius: Vector2f,
pub tau: Float,
#[device(flatten)]
pub sampler: FilterSampler,
}
impl LanczosFilterCreator for LanczosSincFilter {
fn new(radius: Vector2f, tau: Float) -> Self {
let evaluate = |p: Point2f| -> Float {
impl LanczosSincFilterHost {
pub fn new(radius: Vector2f, tau: Float) -> Self {
let sampler = FilterSampler::new(radius, move |p: Point2f| {
windowed_sinc(p.x(), radius.x(), tau) * windowed_sinc(p.y(), radius.y(), tau)
};
let sampler = FilterSampler::new(radius, evaluate);
let sqrt_samples = 64;
let n_samples = sqrt_samples * sqrt_samples;
let area = (2.0 * radius.x()) * (2.0 * radius.y());
let mut sum = 0.0;
let mut rng = rand::rng();
for y in 0..sqrt_samples {
for x in 0..sqrt_samples {
let u = Point2f::new(
(x as Float + rng.random::<Float>()) / sqrt_samples as Float,
(y as Float + rng.random::<Float>()) / sqrt_samples as Float,
);
let p = Point2f::new(
lerp(u.x(), -radius.x(), radius.x()),
lerp(u.y(), -radius.y(), radius.y()),
);
sum += evaluate(p);
}
}
let integral = sum / n_samples as Float * area;
Self {
radius,
tau,
sampler,
integral,
}
});
Self { radius, tau, sampler }
}
}
fn windowed_sinc(x: Float, radius: Float, tau: Float) -> Float {
use std::f32::consts::PI;
let x = x.abs();
if x > radius {
return 0.0;
}
if x < 1e-5 {
1.0
} else {
let xpi = x * PI;
let xpit = xpi * tau;
(xpi.sin() / xpi) * (xpit.sin() / xpit)
}
}

View file

@ -4,23 +4,39 @@ use shared::core::filter::FilterSampler;
use shared::core::geometry::{Point2f, Vector2f};
use shared::filters::MitchellFilter;
pub trait MitchellFilterCreator {
fn new(radius: Vector2f, b: Float, c: Float) -> Self;
#[derive(Clone, Debug, Device)]
#[device(name = "MitchellFilter")]
pub struct MitchellFilterHost {
pub radius: Vector2f,
pub b: Float,
pub c: Float,
#[device(flatten)]
pub sampler: FilterSampler,
}
impl MitchellFilterCreator for MitchellFilter {
fn new(radius: Vector2f, b: Float, c: Float) -> Self {
impl MitchellFilterHost {
pub fn new(radius: Vector2f, b: Float, c: Float) -> Self {
let sampler = FilterSampler::new(radius, move |p: Point2f| {
let nx = 2.0 * p.x() / radius.x();
let ny = 2.0 * p.y() / radius.y();
Self::mitchell_1d_eval(b, c, nx) * Self::mitchell_1d_eval(b, c, ny)
mitchell_1d(p.x() / radius.x(), b, c) * mitchell_1d(p.y() / radius.y(), b, c)
});
Self {
radius,
b,
c,
sampler,
}
Self { radius, b, c, sampler }
}
}
fn mitchell_1d(x: Float, b: Float, c: Float) -> Float {
let x = (2.0 * x).abs();
if x <= 1.0 {
((12.0 - 9.0 * b - 6.0 * c) * x * x * x
+ (-18.0 + 12.0 * b + 6.0 * c) * x * x
+ (6.0 - 2.0 * b))
/ 6.0
} else if x <= 2.0 {
((-b - 6.0 * c) * x * x * x
+ (6.0 * b + 30.0 * c) * x * x
+ (-12.0 * b - 48.0 * c) * x
+ (8.0 * b + 24.0 * c))
/ 6.0
} else {
0.0
}
}

View file

@ -21,16 +21,18 @@ pub struct TriQuadMesh {
pub quad_indices: Vec<i32>,
}
#[derive(Debug)]
pub(crate) struct TriangleMeshStorage {
pub p: Vec<Point3f>,
pub n: Vec<Normal3f>,
pub s: Vec<Vector3f>,
pub uv: Vec<Point2f>,
pub vertex_indices: Vec<i32>,
pub face_indices: Vec<i32>,
#[derive(DeviceRepr)]
#[device(name = "DeviceTriangleMesh")]
pub struct TriangleMeshStorage {
pub vertex_indices: Vec<i32>, // → Ptr<i32> + len (always present)
pub p: Vec<Point3f>, // → Ptr<Point3f> + len (always present)
pub n: Vec<Normal3f>, // → Ptr<Normal3f> + len (empty → null Ptr, len 0)
pub s: Vec<Vector3f>, // → Ptr<Vector3f> + len
pub uv: Vec<Point2f>, // → Ptr<Point2f> + len
pub face_indices: Vec<i32>, // → Ptr<i32> + len
}
#[derive(Debug)]
pub(crate) struct BilinearMeshStorage {
pub vertex_indices: Vec<i32>,

View file

@ -1,147 +1,148 @@
use crate::core::image::Image;
use crate::core::texture::{FloatTexture, SpectrumTexture};
use crate::shapes::{BilinearPatchMesh, TriangleMesh};
use crate::spectra::DenselySampledSpectrumBuffer;
use crate::utils::backend::GpuAllocator;
use crate::utils::mipmap::MIPMap;
use crate::utils::sampling::{PiecewiseConstant2D, WindowedPiecewiseConstant2D};
use parking_lot::Mutex;
use shared::core::color::RGBToSpectrumTable;
use shared::core::image::DeviceImage;
use shared::core::light::Light;
use shared::core::material::Material;
use shared::core::shape::Shape;
use shared::core::material::Material;
use shared::core::spectrum::Spectrum;
use shared::core::texture::{GPUFloatTexture, GPUSpectrumTexture};
use shared::spectra::{DenselySampledSpectrum, DeviceStandardColorSpaces, RGBColorSpace};
use shared::textures::*;
use shared::utils::mesh::{DeviceBilinearPatchMesh, DeviceTriangleMesh};
use shared::utils::sampling::{
DevicePiecewiseConstant1D, DevicePiecewiseConstant2D, DeviceWindowedPiecewiseConstant2D,
};
use shared::utils::Ptr;
use parking_lot::Mutex;
use shared::Ptr;
use std::alloc::Layout;
use std::collections::HashMap;
use std::panic::Location;
use std::slice::from_raw_parts;
use std::sync::Arc;
pub struct Arena<A: GpuAllocator> {
struct Chunk {
ptr: *mut u8,
layout: Layout,
}
struct GpuBump<A: GpuAllocator> {
allocator: A,
inner: Mutex<ArenaInner>,
current: *mut u8,
end: *mut u8,
chunks: Vec<Chunk>,
}
struct ArenaInner {
blocks: Vec<(*mut u8, Layout)>,
current_block: *mut u8,
current_offset: usize,
current_capacity: usize,
current_align: usize,
texture_cache: HashMap<usize, u64>,
const CHUNK_SIZE: usize = 256 * 1024;
impl<A: GpuAllocator> GpuBump<A> {
fn new(allocator: A) -> Self {
Self {
allocator,
current: std::ptr::null_mut(),
end: std::ptr::null_mut(),
chunks: Vec::new(),
}
}
fn alloc<T>(&mut self, value: T) -> *mut T {
let layout = Layout::new::<T>();
let ptr = self.alloc_layout(layout) as *mut T;
unsafe { ptr.write(value) };
ptr
}
fn alloc_slice<T: Copy>(&mut self, values: &[T]) -> (*mut T, usize) {
if values.is_empty() {
return (std::ptr::null_mut(), 0);
}
let layout = Layout::array::<T>(values.len()).unwrap();
let ptr = self.alloc_layout(layout) as *mut T;
unsafe {
std::ptr::copy_nonoverlapping(values.as_ptr(), ptr, values.len());
}
(ptr, values.len())
}
fn alloc_layout(&mut self, layout: Layout) -> *mut u8 {
let size = layout.size();
let align = layout.align();
if size == 0 {
return align as *mut u8;
}
// Fast path: bump from current chunk
let start = self.current as usize;
let aligned = (start + align - 1) & !(align - 1);
let end = aligned + size;
if end <= self.end as usize {
self.current = end as *mut u8;
return aligned as *mut u8;
}
let chunk_size = if size > CHUNK_SIZE {
size.next_multiple_of(align.max(16))
} else {
CHUNK_SIZE.max(size.next_multiple_of(align.max(16)))
};
let chunk_layout = Layout::from_size_align(chunk_size, align.max(16)).unwrap();
let chunk = unsafe { self.allocator.alloc(chunk_layout) };
let caller = Location::caller();
if chunk.is_null() {
panic!(
"GpuBump OOM {} {}: chunk_size={} align={} backend={}",
caller.file(),
caller.line(),
chunk_size,
chunk_layout.align(),
std::any::type_name::<A>()
);
}
self.chunks.push(Chunk {
ptr: chunk,
layout: chunk_layout,
});
// If this object alone fills the chunk, mark it consumed and return.
if size > CHUNK_SIZE {
self.current = unsafe { chunk.add(chunk_size) };
self.end = self.current;
return chunk;
}
// Set up bump pointers inside the new chunk.
self.current = chunk;
self.end = unsafe { chunk.add(chunk_size) };
let aligned = (self.current as usize + align - 1) & !(align - 1);
self.current = (aligned + size) as *mut u8;
aligned as *mut u8
}
}
const DEFAULT_BLOCK_SIZE: usize = 256 * 1024;
impl<A: GpuAllocator> Drop for GpuBump<A> {
fn drop(&mut self) {
for chunk in self.chunks.drain(..) {
unsafe { self.allocator.dealloc(chunk.ptr, chunk.layout) };
}
}
}
unsafe impl<A: GpuAllocator> Send for GpuBump<A> {}
unsafe impl<A: GpuAllocator> Sync for GpuBump<A> {}
pub struct Arena<A: GpuAllocator> {
bump: Mutex<GpuBump<A>>,
texture_cache: Mutex<HashMap<usize, u64>>,
}
impl<A: GpuAllocator> Arena<A> {
pub fn new(allocator: A) -> Self {
Self {
allocator,
inner: Mutex::new(ArenaInner {
blocks: Vec::new(),
current_block: std::ptr::null_mut(),
current_offset: 0,
current_capacity: 0,
current_align: 1,
texture_cache: HashMap::new(),
}),
bump: Mutex::new(GpuBump::new(allocator)),
texture_cache: Mutex::new(HashMap::new()),
}
}
pub fn alloc<T>(&self, value: T) -> Ptr<T> {
let layout = Layout::new::<T>();
let mut inner = self.inner.lock();
let aligned = (inner.current_offset + layout.align() - 1) & !(layout.align() - 1);
// Checking if current block alignment is sufficient
if aligned + layout.size() > inner.current_capacity || inner.current_align < layout.align()
{
let block_size = DEFAULT_BLOCK_SIZE.max(layout.size() * 2);
let block_layout = Layout::from_size_align(block_size, layout.align().max(16)).unwrap();
let block = unsafe { self.allocator.alloc(block_layout) };
// null check
if block.is_null() {
panic!(
"Arena alloc failed at {}:{} — size={} align={}",
caller.file(),
caller.line(),
block_layout.size(),
block_layout.align()
);
}
inner.blocks.push((block, block_layout));
inner.current_block = block;
inner.current_offset = 0;
inner.current_capacity = block_size;
inner.current_align = block_layout.align(); // NEW
let aligned = (0 + layout.align() - 1) & !(layout.align() - 1);
let ptr = unsafe { inner.current_block.add(aligned) as *mut T };
unsafe { ptr.write(value) };
inner.current_offset = aligned + layout.size();
return Ptr::from_raw(ptr);
}
let ptr = unsafe { inner.current_block.add(aligned) as *mut T };
unsafe { ptr.write(value) };
inner.current_offset = aligned + layout.size();
let mut bump = self.bump.lock();
let ptr = bump.alloc(value);
Ptr::from_raw(ptr)
}
pub fn alloc_slice<T: Copy>(&self, values: &[T]) -> (Ptr<T>, usize) {
if values.is_empty() {
return (Ptr::null(), 0);
}
let layout = Layout::array::<T>(values.len()).unwrap();
let mut inner = self.inner.lock();
let aligned = (inner.current_offset + layout.align() - 1) & !(layout.align() - 1);
if aligned + layout.size() > inner.current_capacity || inner.current_align < layout.align()
{
let block_size = DEFAULT_BLOCK_SIZE.max(layout.size() * 2);
let block_layout = Layout::from_size_align(block_size, layout.align().max(16)).unwrap();
let block = unsafe { self.allocator.alloc(block_layout) };
if block.is_null() {
panic!(
"Arena allocation failed: size={} align={}",
block_layout.size(),
block_layout.align()
);
}
inner.blocks.push((block, block_layout));
inner.current_block = block;
inner.current_offset = 0;
inner.current_capacity = block_size;
inner.current_align = block_layout.align(); // NEW
let aligned = 0;
let ptr = unsafe { inner.current_block.add(aligned) as *mut T };
unsafe { std::ptr::copy_nonoverlapping(values.as_ptr(), ptr, values.len()) };
inner.current_offset = aligned + layout.size();
return (Ptr::from_raw(ptr), values.len());
}
let ptr = unsafe { inner.current_block.add(aligned) as *mut T };
unsafe { std::ptr::copy_nonoverlapping(values.as_ptr(), ptr, values.len()) };
inner.current_offset = aligned + layout.size();
(Ptr::from_raw(ptr), values.len())
}
pub fn alloc_opt<T>(&self, value: Option<T>) -> Ptr<T> {
match value {
Some(v) => self.alloc(v),
@ -149,20 +150,20 @@ impl<A: GpuAllocator> Arena<A> {
}
}
pub fn get_texture_object(&self, mipmap: &Arc<MIPMap>) -> u64 {
let key = Arc::as_ptr(mipmap) as usize;
let mut inner = self.inner.lock();
if let Some(&tex_obj) = inner.texture_cache.get(&key) {
return tex_obj;
pub fn alloc_slice<T: Copy>(&self, values: &[T]) -> (Ptr<T>, usize) {
let mut bump = self.bump.lock();
let (ptr, len) = bump.alloc_slice(values);
(Ptr::from_raw(ptr), len)
}
// TODO: Backend-specific texture object creation.
// CUDA: cudaCreateTextureObject
// Vulkan: VkImageView + VkSampler -> descriptor index
let tex_obj = 0u64;
inner.texture_cache.insert(key, tex_obj);
pub fn get_texture_object(&self, mipmap: &Arc<MIPMap>) -> u64 {
let key = Arc::as_ptr(mipmap) as usize;
let mut cache = self.texture_cache.lock();
if let Some(&tex_obj) = cache.get(&key) {
return tex_obj;
}
let tex_obj = 0u64; // TODO: backend-specific creation
cache.insert(key, tex_obj);
tex_obj
}
}
@ -173,76 +174,279 @@ impl<A: GpuAllocator + Default> Default for Arena<A> {
}
}
impl<A: GpuAllocator> Drop for Arena<A> {
fn drop(&mut self) {
let inner = self.inner.get_mut();
for (ptr, layout) in inner.blocks.drain(..) {
unsafe { self.allocator.dealloc(ptr, layout) };
}
}
}
unsafe impl<A: GpuAllocator> Send for Arena<A> {}
unsafe impl<A: GpuAllocator> Sync for Arena<A> {}
pub trait Upload {
pub trait DeviceRepr {
/// The `#[repr(C)] Copy` device-side struct.
type Target: Copy;
fn upload<A: GpuAllocator>(&self, arena: &Arena<A>) -> Ptr<Self::Target>;
/// Upload into the arena and return the device struct by value.
/// Use this when embedding the result inline in another device struct.
fn upload_value<A: GpuAllocator>(&self, arena: &Arena<A>) -> Self::Target;
/// Upload into the arena and return a Ptr to the device struct.
/// This is the common entry point — allocates the Target in the arena.
fn upload<A: GpuAllocator>(&self, arena: &Arena<A>) -> Ptr<Self::Target> {
let value = self.upload_value(arena);
arena.alloc(value)
}
}
impl Upload for Shape {
impl<T: DeviceRepr> DeviceRepr for Option<T> {
type Target = T::Target;
fn upload_value<A: GpuAllocator>(&self, arena: &Arena<A>) -> Self::Target {
match self {
Some(val) => val.upload_value(arena),
None => panic!("Cannot upload_value on None — use upload() which returns Ptr::null()"),
}
}
fn upload<A: GpuAllocator>(&self, arena: &Arena<A>) -> Ptr<Self::Target> {
match self {
Some(val) => val.upload(arena),
None => Ptr::null(),
}
}
}
impl<T: DeviceRepr> DeviceRepr for std::sync::Arc<T> {
type Target = T::Target;
fn upload_value<A: GpuAllocator>(&self, arena: &Arena<A>) -> Self::Target {
(**self).upload_value(arena)
}
fn upload<A: GpuAllocator>(&self, arena: &Arena<A>) -> Ptr<Self::Target> {
(**self).upload(arena)
}
}
impl<T: DeviceRepr> DeviceRepr for Box<T> {
type Target = T::Target;
fn upload_value<A: GpuAllocator>(&self, arena: &Arena<A>) -> Self::Target {
(**self).upload_value(arena)
}
fn upload<A: GpuAllocator>(&self, arena: &Arena<A>) -> Ptr<Self::Target> {
(**self).upload(arena)
}
}
impl DeviceRepr for Shape {
type Target = Shape;
fn upload<A: GpuAllocator>(&self, arena: &Arena<A>) -> Ptr<Self::Target> {
arena.alloc(self.clone())
fn upload_value<A: GpuAllocator>(&self, _arena: &Arena<A>) -> Shape {
self.clone()
}
}
impl Upload for Light {
impl DeviceRepr for Light {
type Target = Light;
fn upload<A: GpuAllocator>(&self, arena: &Arena<A>) -> Ptr<Self::Target> {
arena.alloc(self.clone())
fn upload_value<A: GpuAllocator>(&self, _arena: &Arena<A>) -> Light {
self.clone()
}
}
impl Upload for Image {
type Target = DeviceImage;
fn upload<A: GpuAllocator>(&self, arena: &Arena<A>) -> Ptr<Self::Target> {
arena.alloc(*self.device())
}
}
impl Upload for Spectrum {
impl DeviceRepr for Spectrum {
type Target = Spectrum;
fn upload<A: GpuAllocator>(&self, arena: &Arena<A>) -> Ptr<Self::Target> {
arena.alloc(self.clone())
fn upload_value<A: GpuAllocator>(&self, _arena: &Arena<A>) -> Spectrum {
self.clone()
}
}
impl Upload for Material {
impl DeviceRepr for Material {
type Target = Material;
fn upload<A: GpuAllocator>(&self, arena: &Arena<A>) -> Ptr<Self::Target> {
arena.alloc(self.clone())
fn upload_value<A: GpuAllocator>(&self, _arena: &Arena<A>) -> Material {
self.clone()
}
}
impl Upload for DenselySampledSpectrumBuffer {
// =============================================================================
// Image → DeviceImage
// =============================================================================
impl DeviceRepr for Image {
type Target = DeviceImage;
fn upload_value<A: GpuAllocator>(&self, _arena: &Arena<A>) -> DeviceImage {
*self.device()
}
}
// =============================================================================
// DenselySampledSpectrumBuffer → DenselySampledSpectrum
// =============================================================================
impl DeviceRepr for DenselySampledSpectrumBuffer {
type Target = DenselySampledSpectrum;
fn upload<A: GpuAllocator>(&self, arena: &Arena<A>) -> Ptr<Self::Target> {
arena.alloc(*&self.device())
fn upload_value<A: GpuAllocator>(&self, _arena: &Arena<A>) -> DenselySampledSpectrum {
self.device()
}
}
impl Upload for SpectrumTexture {
// =============================================================================
// RGBToSpectrumTable — re-uploads Ptr fields into arena
// =============================================================================
impl DeviceRepr for RGBToSpectrumTable {
type Target = RGBToSpectrumTable;
fn upload_value<A: GpuAllocator>(&self, arena: &Arena<A>) -> RGBToSpectrumTable {
let n_nodes = self.n_nodes as usize;
// Safety: these Ptrs point into static or previously-uploaded data;
// we're copying the contents into the arena for a new lifetime.
let z_slice = unsafe { from_raw_parts(self.z_nodes.as_raw(), n_nodes) };
let (z_ptr, _) = arena.alloc_slice(z_slice);
let n_coeffs = 3 * n_nodes.pow(3);
let coeffs_slice = unsafe { from_raw_parts(self.coeffs.as_raw(), n_coeffs) };
let (c_ptr, _) = arena.alloc_slice(coeffs_slice);
RGBToSpectrumTable {
z_nodes: z_ptr,
coeffs: c_ptr,
n_nodes: self.n_nodes,
}
}
}
// =============================================================================
// RGBColorSpace — nested upload of spectrum table
// =============================================================================
impl DeviceRepr for RGBColorSpace {
type Target = RGBColorSpace;
fn upload_value<A: GpuAllocator>(&self, arena: &Arena<A>) -> RGBColorSpace {
let table_ptr = self.rgb_to_spectrum_table.upload(arena);
RGBColorSpace {
r: self.r,
g: self.g,
b: self.b,
w: self.w,
illuminant: self.illuminant.clone(),
rgb_to_spectrum_table: table_ptr,
xyz_from_rgb: self.xyz_from_rgb,
rgb_from_xyz: self.rgb_from_xyz,
}
}
}
// =============================================================================
// DeviceStandardColorSpaces — composition of color space uploads
// =============================================================================
impl DeviceRepr for DeviceStandardColorSpaces {
type Target = DeviceStandardColorSpaces;
fn upload_value<A: GpuAllocator>(&self, arena: &Arena<A>) -> DeviceStandardColorSpaces {
DeviceStandardColorSpaces {
srgb: self.srgb.upload(arena),
dci_p3: self.dci_p3.upload(arena),
rec2020: self.rec2020.upload(arena),
aces2065_1: self.aces2065_1.upload(arena),
}
}
}
// =============================================================================
// TriangleMesh → DeviceTriangleMesh
// =============================================================================
// TriangleMesh should own all its fields directly — no .device field.
// The host struct holds Vec arrays + scalar metadata. derive(Device) handles it:
//
// #[derive(Device)]
// #[device(name = "DeviceTriangleMesh")]
// pub struct TriangleMesh {
// pub vertex_indices: Vec<i32>,
// pub p: Vec<Point3f>,
// pub n: Vec<Normal3f>,
// pub s: Vec<Vector3f>,
// pub uv: Vec<Point2f>,
// pub face_indices: Vec<i32>,
// pub n_triangles: u32,
// pub reverse_orientation: bool,
// pub transform_swaps_handedness: bool,
// }
//
// Until the mesh struct is refactored to remove .device, here's a manual
// impl that lists every field. This is the MIGRATION TARGET — once TriangleMesh
// drops its .device field and puts scalars at the top level, switch to derive(Device).
impl DeviceRepr for TriangleMesh {
type Target = DeviceTriangleMesh;
fn upload_value<A: GpuAllocator>(&self, arena: &Arena<A>) -> DeviceTriangleMesh {
let s = &self.storage;
let (vertex_indices_ptr, _) = arena.alloc_slice(&s.vertex_indices);
let (p_ptr, _) = arena.alloc_slice(&s.p);
let (n_ptr, _) = arena.alloc_slice(&s.n);
let (s_ptr, _) = arena.alloc_slice(&s.s);
let (uv_ptr, _) = arena.alloc_slice(&s.uv);
let (face_indices_ptr, _) = arena.alloc_slice(&s.face_indices);
DeviceTriangleMesh {
vertex_indices: vertex_indices_ptr,
p: p_ptr,
n: n_ptr,
s: s_ptr,
uv: uv_ptr,
face_indices: face_indices_ptr,
n_triangles: self.n_triangles,
reverse_orientation: self.reverse_orientation,
transform_swaps_handedness: self.transform_swaps_handedness,
}
}
}
// =============================================================================
// BilinearPatchMesh → DeviceBilinearPatchMesh
// =============================================================================
// Same as TriangleMesh: once the host struct is refactored to own scalars
// directly (no .device field), this switches to derive(Device).
impl DeviceRepr for BilinearPatchMesh {
type Target = DeviceBilinearPatchMesh;
fn upload_value<A: GpuAllocator>(&self, arena: &Arena<A>) -> DeviceBilinearPatchMesh {
let s = &self.storage;
let (vertex_indices_ptr, _) = arena.alloc_slice(&s.vertex_indices);
let (p_ptr, _) = arena.alloc_slice(&s.p);
let (n_ptr, _) = arena.alloc_slice(&s.n);
let (uv_ptr, _) = arena.alloc_slice(&s.uv);
let image_dist_ptr = s.image_distribution.upload(arena);
DeviceBilinearPatchMesh {
vertex_indices: vertex_indices_ptr,
p: p_ptr,
n: n_ptr,
uv: uv_ptr,
image_distribution: image_dist_ptr,
n_patches: self.n_patches,
reverse_orientation: self.reverse_orientation,
transform_swaps_handedness: self.transform_swaps_handedness,
}
}
}
// =============================================================================
// SpectrumTexture → GPUSpectrumTexture
// =============================================================================
impl DeviceRepr for SpectrumTexture {
type Target = GPUSpectrumTexture;
fn upload<A: GpuAllocator>(&self, arena: &Arena<A>) -> Ptr<Self::Target> {
let gpu_variant = match self {
fn upload_value<A: GpuAllocator>(&self, arena: &Arena<A>) -> GPUSpectrumTexture {
match self {
SpectrumTexture::Constant(tex) => GPUSpectrumTexture::Constant(tex.clone()),
SpectrumTexture::Checkerboard(tex) => GPUSpectrumTexture::Checkerboard(tex.clone()),
SpectrumTexture::Dots(tex) => GPUSpectrumTexture::Dots(tex.clone()),
SpectrumTexture::Image(tex) => {
let tex_obj = arena.get_texture_object(&tex.base.mipmap);
let gpu_img = GPUSpectrumImageTexture {
GPUSpectrumTexture::Image(GPUSpectrumImageTexture {
mapping: tex.base.mapping,
tex_obj,
scale: tex.base.scale,
@ -255,55 +459,50 @@ impl Upload for SpectrumTexture {
.clone()
.unwrap_or_else(crate::spectra::default_colorspace),
spectrum_type: tex.spectrum_type,
};
GPUSpectrumTexture::Image(gpu_img)
})
}
SpectrumTexture::Bilerp(tex) => GPUSpectrumTexture::Bilerp(tex.clone()),
SpectrumTexture::Scaled(tex) => {
let child_ptr = tex.tex.upload(arena);
let gpu_scaled = GPUSpectrumScaledTexture {
let scale_ptr = tex.scale.upload(arena);
GPUSpectrumTexture::Scaled(GPUSpectrumScaledTexture {
tex: child_ptr,
scale: tex.scale.upload(arena),
};
GPUSpectrumTexture::Scaled(gpu_scaled)
scale: scale_ptr,
})
}
SpectrumTexture::Marble(tex) => GPUSpectrumTexture::Marble(tex.clone()),
SpectrumTexture::Mix(tex) => {
let tex1_ptr = tex.tex1.upload(arena);
let tex2_ptr = tex.tex2.upload(arena);
let amount_ptr = tex.amount.upload(arena);
let gpu_mix = GPUSpectrumMixTexture {
GPUSpectrumTexture::Mix(GPUSpectrumMixTexture {
tex1: tex1_ptr,
tex2: tex2_ptr,
amount: amount_ptr,
};
GPUSpectrumTexture::Mix(gpu_mix)
})
}
SpectrumTexture::DirectionMix(tex) => {
let tex1_ptr = tex.tex1.upload(arena);
let tex2_ptr = tex.tex2.upload(arena);
let gpu_mix = GPUSpectrumDirectionMixTexture {
GPUSpectrumTexture::DirectionMix(GPUSpectrumDirectionMixTexture {
tex1: tex1_ptr,
tex2: tex2_ptr,
dir: tex.dir,
};
GPUSpectrumTexture::DirectionMix(gpu_mix)
})
}
}
};
arena.alloc(gpu_variant)
}
}
impl Upload for FloatTexture {
// =============================================================================
// FloatTexture → GPUFloatTexture
// =============================================================================
impl DeviceRepr for FloatTexture {
type Target = GPUFloatTexture;
fn upload<A: GpuAllocator>(&self, arena: &Arena<A>) -> Ptr<Self::Target> {
let gpu_variant = match self {
fn upload_value<A: GpuAllocator>(&self, arena: &Arena<A>) -> GPUFloatTexture {
match self {
FloatTexture::Constant(tex) => GPUFloatTexture::Constant(tex.clone()),
FloatTexture::Checkerboard(tex) => GPUFloatTexture::Checkerboard(tex.clone()),
FloatTexture::Dots(tex) => GPUFloatTexture::Dots(tex.clone()),
@ -312,249 +511,40 @@ impl Upload for FloatTexture {
FloatTexture::Wrinkled(tex) => GPUFloatTexture::Wrinkled(tex.clone()),
FloatTexture::Scaled(tex) => {
let child_ptr = tex.tex.upload(arena);
let gpu_scaled = GPUFloatScaledTexture {
let scale_ptr = tex.scale.upload(arena);
GPUFloatTexture::Scaled(GPUFloatScaledTexture {
tex: child_ptr,
scale: tex.scale.upload(arena),
};
GPUFloatTexture::Scaled(gpu_scaled)
scale: scale_ptr,
})
}
FloatTexture::Mix(tex) => {
let tex1_ptr = tex.tex1.upload(arena);
let tex2_ptr = tex.tex2.upload(arena);
let amount_ptr = tex.amount.upload(arena);
let gpu_mix = GPUFloatMixTexture {
GPUFloatTexture::Mix(GPUFloatMixTexture {
tex1: tex1_ptr,
tex2: tex2_ptr,
amount: amount_ptr,
};
GPUFloatTexture::Mix(gpu_mix)
})
}
FloatTexture::DirectionMix(tex) => {
let tex1_ptr = tex.tex1.upload(arena);
let tex2_ptr = tex.tex2.upload(arena);
let gpu_dmix = GPUFloatDirectionMixTexture {
GPUFloatTexture::DirectionMix(GPUFloatDirectionMixTexture {
tex1: tex1_ptr,
tex2: tex2_ptr,
dir: tex.dir,
};
GPUFloatTexture::DirectionMix(gpu_dmix)
})
}
FloatTexture::Image(tex) => {
let gpu_image_tex = GPUFloatImageTexture {
GPUFloatTexture::Image(GPUFloatImageTexture {
mapping: tex.base.mapping,
tex_obj: tex.base.mipmap.texture_object(),
scale: tex.base.scale,
invert: tex.base.invert,
};
GPUFloatTexture::Image(gpu_image_tex)
}
FloatTexture::Bilerp(tex) => GPUFloatTexture::Bilerp(tex.clone()),
};
arena.alloc(gpu_variant)
}
}
impl Upload for RGBToSpectrumTable {
type Target = RGBToSpectrumTable;
fn upload<A: GpuAllocator>(&self, arena: &Arena<A>) -> Ptr<Self::Target> {
let n_nodes = self.n_nodes as usize;
let z_slice = unsafe { from_raw_parts(self.z_nodes.as_raw(), n_nodes) };
let (z_ptr, _) = arena.alloc_slice(z_slice);
let n_coeffs = 3 * (n_nodes as usize).pow(3);
let coeffs_slice = unsafe { from_raw_parts(self.coeffs.as_raw(), n_coeffs) };
let (c_ptr, _) = arena.alloc_slice(coeffs_slice);
arena.alloc(RGBToSpectrumTable {
z_nodes: z_ptr,
coeffs: c_ptr,
n_nodes: self.n_nodes,
})
}
}
impl Upload for RGBColorSpace {
type Target = RGBColorSpace;
fn upload<A: GpuAllocator>(&self, arena: &Arena<A>) -> Ptr<Self::Target> {
let table_ptr = self.rgb_to_spectrum_table.upload(arena);
let shared_space = RGBColorSpace {
r: self.r,
g: self.g,
b: self.b,
w: self.w,
illuminant: self.illuminant.clone(),
rgb_to_spectrum_table: table_ptr,
xyz_from_rgb: self.xyz_from_rgb,
rgb_from_xyz: self.rgb_from_xyz,
};
arena.alloc(shared_space)
}
}
impl Upload for DeviceStandardColorSpaces {
type Target = DeviceStandardColorSpaces;
fn upload<A: GpuAllocator>(&self, arena: &Arena<A>) -> Ptr<Self::Target> {
let srgb_ptr = self.srgb.upload(arena);
let dci_ptr = self.dci_p3.upload(arena);
let rec_ptr = self.rec2020.upload(arena);
let aces_ptr = self.aces2065_1.upload(arena);
let registry = DeviceStandardColorSpaces {
srgb: srgb_ptr,
dci_p3: dci_ptr,
rec2020: rec_ptr,
aces2065_1: aces_ptr,
};
arena.alloc(registry)
}
}
impl Upload for PiecewiseConstant2D {
type Target = DevicePiecewiseConstant2D;
fn upload<A: GpuAllocator>(&self, arena: &Arena<A>) -> Ptr<Self::Target> {
let marginal_shared = self.marginal.to_shared(arena);
let conditionals_shared: Vec<DevicePiecewiseConstant1D> = self
.conditionals
.iter()
.map(|c| c.to_shared(arena))
.collect();
let (conditionals_ptr, _) = arena.alloc_slice(&conditionals_shared);
let shared_2d = DevicePiecewiseConstant2D {
conditionals: conditionals_ptr,
marginal: marginal_shared,
..self.device
};
arena.alloc(shared_2d)
}
}
impl Upload for WindowedPiecewiseConstant2D {
type Target = DeviceWindowedPiecewiseConstant2D;
fn upload<A: GpuAllocator>(&self, arena: &Arena<A>) -> Ptr<Self::Target> {
let specific = DeviceWindowedPiecewiseConstant2D {
sat: self.sat,
func: self.func,
};
arena.alloc(specific)
}
}
impl Upload for TriangleMesh {
type Target = DeviceTriangleMesh;
fn upload<A: GpuAllocator>(&self, arena: &Arena<A>) -> Ptr<Self::Target> {
let storage = &self.storage;
// Upload all arrays to arena
let (vertex_indices_ptr, _) = arena.alloc_slice(&storage.vertex_indices);
let (p_ptr, _) = arena.alloc_slice(&storage.p);
let (n_ptr, _) = if storage.n.is_empty() {
(Ptr::null(), 0)
} else {
arena.alloc_slice(&storage.n)
};
let (s_ptr, _) = if storage.s.is_empty() {
(Ptr::null(), 0)
} else {
arena.alloc_slice(&storage.s)
};
let (uv_ptr, _) = if storage.uv.is_empty() {
(Ptr::null(), 0)
} else {
arena.alloc_slice(&storage.uv)
};
let (face_indices_ptr, _) = if storage.face_indices.is_empty() {
(Ptr::null(), 0)
} else {
arena.alloc_slice(&storage.face_indices)
};
let device = DeviceTriangleMesh {
vertex_indices: vertex_indices_ptr,
p: p_ptr,
n: n_ptr,
s: s_ptr,
uv: uv_ptr,
face_indices: face_indices_ptr,
..self.device
};
arena.alloc(device)
}
}
impl Upload for BilinearPatchMesh {
type Target = DeviceBilinearPatchMesh;
fn upload<A: GpuAllocator>(&self, arena: &Arena<A>) -> Ptr<Self::Target> {
let storage = &self.storage;
let (vertex_indices_ptr, _) = arena.alloc_slice(&storage.vertex_indices);
let (p_ptr, _) = arena.alloc_slice(&storage.p);
let (n_ptr, _) = if storage.n.is_empty() {
(Ptr::null(), 0)
} else {
arena.alloc_slice(&storage.n)
};
let (uv_ptr, _) = if storage.uv.is_empty() {
(Ptr::null(), 0)
} else {
arena.alloc_slice(&storage.uv)
};
let image_dist_ptr = storage.image_distribution.upload(arena);
let device = DeviceBilinearPatchMesh {
vertex_indices: vertex_indices_ptr,
p: p_ptr,
n: n_ptr,
uv: uv_ptr,
image_distribution: image_dist_ptr,
..self.device
};
arena.alloc(device)
}
}
impl<T: Upload> Upload for Option<T> {
type Target = T::Target;
fn upload<A: GpuAllocator>(&self, arena: &Arena<A>) -> Ptr<Self::Target> {
match self {
Some(val) => val.upload(arena),
None => Ptr::null(),
FloatTexture::Bilerp(tex) => GPUFloatTexture::Bilerp(tex.clone()),
}
}
}
impl<T: Upload> Upload for Arc<T> {
type Target = T::Target;
fn upload<A: GpuAllocator>(&self, arena: &Arena<A>) -> Ptr<Self::Target> {
(**self).upload(arena)
}
}

View file

@ -1,18 +1,17 @@
use std::alloc::Layout;
pub trait GpuAllocator: Send + Sync {
/// Allocate `size` bytes with given alignment.
/// Returns a host-mapped pointer.
pub trait GpuAllocator: Send + Sync + Clone {
unsafe fn alloc(&self, layout: Layout) -> *mut u8;
unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout);
}
/// CPU fallback — standard system allocator.
/// CPU fallback
#[derive(Clone)]
pub struct SystemAllocator;
impl Default for SystemAllocator {
fn default() -> Self {
Self {}
Self
}
}
@ -21,101 +20,85 @@ impl GpuAllocator for SystemAllocator {
if layout.size() == 0 {
return layout.align() as *mut u8;
}
unsafe { std::alloc::alloc(layout) }
std::alloc::alloc(layout)
}
unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
if layout.size() > 0 {
unsafe {
std::alloc::dealloc(ptr, layout);
}
}
}
}
/// CUDA unified memory backend using CudaAllocator
/// CUDA unified memory. Still using CudaAllocator, might move over to
#[cfg(feature = "cuda")]
pub mod cuda {
use super::GpuAllocator;
use cust::memory::{cuda_free_unified, cuda_malloc_unified, UnifiedPointer};
use std::alloc::Layout;
#[derive(Clone)]
pub struct CudaAllocator;
impl Default for CudaAllocator {
fn default() -> Self {
Self {}
Self
}
}
impl GpuAllocator for CudaAllocator {
unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
use cust::memory::cuda_malloc_unified;
use cust_raw::driver_sys::*;
let size = layout.size().max(layout.align());
let size = layout.size();
if size == 0 {
return layout.align() as *mut u8;
}
let mut ctx: CUcontext = std::ptr::null_mut();
cuCtxGetCurrent(&mut ctx);
if ctx.is_null() {
let mut primary: CUcontext = std::ptr::null_mut();
cuDevicePrimaryCtxRetain(&mut primary, 0);
cuCtxSetCurrent(primary);
}
let ptr = cuda_malloc_unified::<u8>(size)
.expect("cuda_malloc_unified failed — is a CUDA context current?");
let mut unified_ptr =
unsafe { cuda_malloc_unified::<u8>(size).expect("cuda_malloc_unified failed") };
let raw = unified_ptr.as_raw_mut();
std::mem::forget(unified_ptr);
let raw = ptr.as_raw_mut();
std::mem::forget(ptr); // Leak RAII wrapper; Arena owns the raw pointer
raw
}
unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
use cust::memory::{UnifiedPointer, cuda_free_unified};
if layout.size() > 0 {
let _ = unsafe { cuda_free_unified(UnifiedPointer::wrap(ptr)) };
if layout.size() > 0 && !ptr.is_null() {
let _ = cuda_free_unified(UnifiedPointer::wrap(ptr));
}
}
}
}
/// Vulkan backend (gpu-allocator for now, there might be a better solution)
#[cfg(feature = "vulkan")]
pub mod vulkan {
use super::GpuAllocator;
use ash::vk;
use gpu_allocator::MemoryLocation;
use gpu_allocator::vulkan::{
Allocation, AllocationCreateDesc, AllocationScheme, Allocator, AllocatorCreateDesc,
};
use gpu_allocator::MemoryLocation;
use parking_lot::Mutex;
use std::alloc::Layout;
use std::collections::HashMap;
use std::sync::OnceLock;
use std::sync::Arc;
// So, having a static allocator seems like a terrible idea
// But I cant find a way to get a functioning generic Arena constructor
// That might not even be a necessity, since rust-gpu/rust-cuda might actually handle that
// differently
static VK_ALLOCATOR: OnceLock<VulkanAllocatorInner> = OnceLock::new();
struct VulkanAllocatorInner {
state: Mutex<VulkanState>,
#[derive(Clone)]
pub struct VulkanAllocator {
inner: Arc<Mutex<VulkanInner>>,
}
struct VulkanState {
struct VulkanInner {
device: ash::Device,
allocator: Allocator,
allocations: HashMap<usize, Allocation>,
}
pub fn init_vulkan(
impl VulkanAllocator {
pub fn new(
instance: &ash::Instance,
device: &ash::Device,
device: ash::Device,
physical_device: vk::PhysicalDevice,
) {
VK_ALLOCATOR.get_or_init(|| {
) -> Self {
let allocator = Allocator::new(&AllocatorCreateDesc {
instance: instance.clone(),
device: device.clone(),
@ -126,52 +109,29 @@ pub mod vulkan {
})
.expect("Failed to create Vulkan allocator");
VulkanAllocatorInner {
state: Mutex::new(VulkanState {
Self {
inner: Arc::new(Mutex::new(VulkanInner {
device,
allocator,
allocations: HashMap::new(),
}),
}
});
}
fn inner() -> &'static VulkanAllocatorInner {
VK_ALLOCATOR
.get()
.expect("Vulkan not initialized — call init_vulkan() before arena creation")
}
impl Default for VulkanAllocator {
fn default() -> Self {
let _ = inner();
Self
})),
}
}
}
pub struct VulkanAllocator;
// impl VulkanAllocator {
// pub fn new(allocator: Allocator) -> Self {
// Self {
// allocator: Mutex::new(allocator),
// allocations: Mutex::new(HashMap::new()),
// }
// }
// }
impl GpuAllocator for VulkanAllocator {
unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
let size = layout.size().max(layout.align());
let size = layout.size();
if size == 0 {
return layout.align() as *mut u8;
}
let inner = inner();
let mut state = inner.state.lock();
let allocation = state
let mut inner = self.inner.lock();
let allocation = inner
.allocator
.allocate(&AllocationCreateDesc {
name: "arena",
name: "arena_chunk",
requirements: vk::MemoryRequirements {
size: size as u64,
alignment: layout.align() as u64,
@ -188,18 +148,17 @@ pub mod vulkan {
.expect("Vulkan allocation not host-mapped")
.as_ptr() as *mut u8;
state.allocations.insert(ptr as usize, allocation);
inner.allocations.insert(ptr as usize, allocation);
ptr
}
unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
if layout.size() == 0 {
if layout.size() == 0 || ptr.is_null() {
return;
}
let inner = inner();
let mut state = inner.state.lock();
if let Some(allocation) = state.allocations.remove(&(ptr as usize)) {
state
let mut inner = self.inner.lock();
if let Some(allocation) = inner.allocations.remove(&(ptr as usize)) {
inner
.allocator
.free(allocation)
.expect("Vulkan free failed");

View file

@ -34,6 +34,7 @@ where
}
#[derive(Debug, Clone)]
#[derive(DeviceRepr)]
pub struct Array2D<T> {
pub device: DeviceArray2D<T>,
pub values: Vec<T>,

View file

@ -12,7 +12,7 @@ pub mod parser;
pub mod sampling;
pub mod strings;
pub use arena::Upload;
pub use arena::DeviceRepr;
pub use error::FileLoc;
pub use file::{read_float_file, resolve_filename};
pub use parameters::{
@ -28,3 +28,516 @@ pub type Arena = arena::Arena<backend::cuda::CudaAllocator>;
#[cfg(not(any(feature = "cuda", feature = "vulkan")))]
pub type Arena = arena::Arena<backend::SystemAllocator>;
/// # Enum variant attributes
///
/// | Attribute | Effect |
/// |-----------|--------|
/// | *(none)* | Inner type has `DeviceRepr`; auto-call `upload_value` |
/// | `#[device(clone)]` | Same type on both sides, just clone |
/// | `#[device(custom = "method")]` | You provide `fn method(inner: &T, arena) -> DeviceT` |
/// | `#[device(variant_type = "T")]` | Override the device-side variant's inner type |
///
/// # Container attribute
///
/// `#[device(name = "DeviceFoo")]` — override the generated type name (default: `Device{Name}`).
#[proc_macro_derive(Device, attributes(device))]
pub fn derive_device(input: TokenStream) -> TokenStream {
let input = parse_macro_input!(input as DeriveInput);
match derive_impl(input) {
Ok(tokens) => tokens.into(),
Err(e) => e.to_compile_error().into(),
}
}
fn derive_impl(input: DeriveInput) -> syn::Result<TokenStream2> {
match &input.data {
Data::Struct(_) => derive_struct(input),
Data::Enum(_) => derive_enum(input),
Data::Union(_) => Err(syn::Error::new_spanned(
&input.ident,
"Device derive does not support unions",
)),
}
}
// Struct derivation
fn derive_struct(input: DeriveInput) -> syn::Result<TokenStream2> {
let host_name = &input.ident;
let vis = &input.vis;
let device_name = get_device_name(&input.attrs, host_name)?;
let fields = match &input.data {
Data::Struct(s) => match &s.fields {
Fields::Named(named) => &named.named,
_ => {
return Err(syn::Error::new_spanned(
host_name,
"Device derive only supports structs with named fields",
))
}
},
_ => unreachable!(),
};
let mut device_fields = Vec::new();
let mut upload_stmts = Vec::new();
let mut device_field_inits = Vec::new();
let mut spread_expr: Option<Expr> = None;
for field in fields {
let field_name = field.ident.as_ref().unwrap();
let attrs = parse_field_attrs(&field.attrs)?;
if attrs.skip {
continue;
}
if let Some(ref expr_str) = attrs.spread {
spread_expr = Some(syn::parse_str(expr_str).map_err(|e| {
syn::Error::new_spanned(field, format!("invalid device(spread): {}", e))
})?);
continue;
}
if let Some(expr_str) = &attrs.expr {
let expr: Expr = syn::parse_str(expr_str).map_err(|e| {
syn::Error::new_spanned(field, format!("invalid device(expr): {}", e))
})?;
let ty = &field.ty;
device_fields.push(quote! { pub #field_name: #ty });
upload_stmts.push(quote! { let #field_name = #expr; });
device_field_inits.push(quote! { #field_name });
continue;
}
match classify_type(&field.ty) {
FieldClass::VecCopy(inner_ty) => {
let len_name = format_ident!("{}_len", field_name);
device_fields.push(quote! { pub #field_name: Ptr<#inner_ty> });
device_fields.push(quote! { pub #len_name: usize });
upload_stmts.push(quote! {
let (#field_name, #len_name) = arena.alloc_slice(&self.#field_name);
});
device_field_inits.push(quote! { #field_name });
device_field_inits.push(quote! { #len_name });
}
FieldClass::VecUploadable(inner_ty) => {
let len_name = format_ident!("{}_len", field_name);
device_fields.push(quote! {
pub #field_name: Ptr<<#inner_ty as DeviceRepr>::Target>
});
device_fields.push(quote! { pub #len_name: usize });
upload_stmts.push(quote! {
let __up: Vec<<#inner_ty as DeviceRepr>::Target> = self.#field_name
.iter()
.map(|item| DeviceRepr::upload_value(item, arena))
.collect();
let (#field_name, #len_name) = arena.alloc_slice(&__up);
});
device_field_inits.push(quote! { #field_name });
device_field_inits.push(quote! { #len_name });
}
FieldClass::Option(inner_ty) => {
device_fields.push(quote! {
pub #field_name: Ptr<<#inner_ty as DeviceRepr>::Target>
});
upload_stmts.push(quote! {
let #field_name = match &self.#field_name {
Some(val) => DeviceRepr::upload(val, arena),
None => Ptr::null(),
};
});
device_field_inits.push(quote! { #field_name });
}
FieldClass::Arc(inner_ty) => {
if attrs.flatten {
device_fields.push(quote! {
pub #field_name: <#inner_ty as DeviceRepr>::Target
});
upload_stmts.push(quote! {
let #field_name = DeviceRepr::upload_value(&*self.#field_name, arena);
});
} else {
device_fields.push(quote! {
pub #field_name: Ptr<<#inner_ty as DeviceRepr>::Target>
});
upload_stmts.push(quote! {
let #field_name = DeviceRepr::upload(&*self.#field_name, arena);
});
}
device_field_inits.push(quote! { #field_name });
}
FieldClass::Plain => {
let ty = &field.ty;
if attrs.copy_upload {
device_fields.push(quote! { pub #field_name: #ty });
upload_stmts.push(quote! {
let #field_name = self.#field_name.clone();
});
} else if attrs.flatten {
device_fields.push(quote! {
pub #field_name: <#ty as DeviceRepr>::Target
});
upload_stmts.push(quote! {
let #field_name = DeviceRepr::upload_value(&self.#field_name, arena);
});
} else if attrs.upload {
device_fields.push(quote! {
pub #field_name: Ptr<<#ty as DeviceRepr>::Target>
});
upload_stmts.push(quote! {
let #field_name = DeviceRepr::upload(&self.#field_name, arena);
});
} else {
device_fields.push(quote! { pub #field_name: #ty });
upload_stmts.push(quote! {
let #field_name = self.#field_name;
});
}
device_field_inits.push(quote! { #field_name });
}
}
}
let constructor = if let Some(spread) = spread_expr {
quote! {
#device_name {
#(#device_field_inits,)*
..#spread
}
}
} else {
quote! {
#device_name {
#(#device_field_inits,)*
}
}
};
Ok(quote! {
#[repr(C)]
#[derive(Debug, Copy, Clone)]
#vis struct #device_name {
#(#device_fields,)*
}
unsafe impl Send for #device_name {}
unsafe impl Sync for #device_name {}
impl DeviceRepr for #host_name {
type Target = #device_name;
fn upload_value<A: GpuAllocator>(&self, arena: &Arena<A>) -> Self::Target {
#(#upload_stmts)*
#constructor
}
}
})
}
// Enum derivation
fn derive_enum(input: DeriveInput) -> syn::Result<TokenStream2> {
let host_name = &input.ident;
let vis = &input.vis;
let device_name = get_device_name(&input.attrs, host_name)?;
let variants = match &input.data {
Data::Enum(e) => &e.variants,
_ => unreachable!(),
};
let mut device_variants = Vec::new();
let mut match_arms = Vec::new();
for variant in variants {
let var_name = &variant.ident;
let var_attrs = parse_variant_attrs(&variant.attrs)?;
let inner_ty = get_variant_inner_type(variant)?;
// Determine the device-side inner type for this variant
let device_inner: Type = if let Some(ref ty_str) = var_attrs.variant_type {
syn::parse_str(ty_str).map_err(|e| {
syn::Error::new_spanned(variant, format!("invalid variant_type: {}", e))
})?
} else if var_attrs.clone_variant {
// clone: same type on both sides
inner_ty.clone()
} else {
// auto-upload: use DeviceRepr::Target
syn::parse_str(&format!("<{} as DeviceRepr>::Target", quote!(#inner_ty))).map_err(
|e| {
syn::Error::new_spanned(variant, format!("cannot construct Target type: {}", e))
},
)?
};
device_variants.push(quote! { #var_name(#device_inner) });
if var_attrs.clone_variant {
match_arms.push(quote! {
#host_name::#var_name(inner) => #device_name::#var_name(inner.clone())
});
} else if let Some(ref method) = var_attrs.custom {
let method_ident = format_ident!("{}", method);
match_arms.push(quote! {
#host_name::#var_name(inner) => {
#device_name::#var_name(Self::#method_ident(inner, arena))
}
});
} else {
// Default: inner implements DeviceRepr
match_arms.push(quote! {
#host_name::#var_name(inner) => {
#device_name::#var_name(DeviceRepr::upload_value(inner, arena))
}
});
}
}
Ok(quote! {
#[repr(C)]
#[derive(Debug, Copy, Clone)]
#vis enum #device_name {
#(#device_variants,)*
}
unsafe impl Send for #device_name {}
unsafe impl Sync for #device_name {}
impl DeviceRepr for #host_name {
type Target = #device_name;
fn upload_value<A: GpuAllocator>(&self, arena: &Arena<A>) -> Self::Target {
match self {
#(#match_arms,)*
}
}
}
})
}
fn get_variant_inner_type(variant: &Variant) -> syn::Result<Type> {
match &variant.fields {
Fields::Unnamed(fields) if fields.unnamed.len() == 1 => {
Ok(fields.unnamed.first().unwrap().ty.clone())
}
Fields::Unit => Err(syn::Error::new_spanned(
variant,
"Device derive: enum variants must have exactly one field, e.g. Variant(Type)",
)),
_ => Err(syn::Error::new_spanned(
variant,
"Device derive: only single-field tuple variants supported, e.g. Variant(Type)",
)),
}
}
// Attribute parsing for variants
struct VariantAttrs {
clone_variant: bool,
custom: Option<String>,
variant_type: Option<String>,
}
fn parse_variant_attrs(attrs: &[Attribute]) -> syn::Result<VariantAttrs> {
let mut result = VariantAttrs {
clone_variant: false,
custom: None,
variant_type: None,
};
for attr in attrs {
if !attr.path().is_ident("device") {
continue;
}
attr.parse_nested_meta(|meta| {
if meta.path.is_ident("clone") {
result.clone_variant = true;
Ok(())
} else if meta.path.is_ident("custom") {
let value = meta.value()?;
let lit: Lit = value.parse()?;
if let Lit::Str(s) = lit {
result.custom = Some(s.value());
Ok(())
} else {
Err(meta.error("expected string literal"))
}
} else if meta.path.is_ident("variant_type") {
let value = meta.value()?;
let lit: Lit = value.parse()?;
if let Lit::Str(s) = lit {
result.variant_type = Some(s.value());
Ok(())
} else {
Err(meta.error("expected string literal"))
}
} else {
Err(meta.error("unknown device variant attribute"))
}
})?;
}
Ok(result)
}
// Attribute parsing for fields
struct FieldAttrs {
skip: bool,
expr: Option<String>,
copy_upload: bool,
flatten: bool,
upload: bool,
spread: Option<String>,
}
fn parse_field_attrs(attrs: &[Attribute]) -> syn::Result<FieldAttrs> {
let mut result = FieldAttrs {
skip: false,
expr: None,
copy_upload: false,
flatten: false,
upload: false,
spread: None,
};
for attr in attrs {
if !attr.path().is_ident("device") {
continue;
}
attr.parse_nested_meta(|meta| {
if meta.path.is_ident("skip") {
result.skip = true;
Ok(())
} else if meta.path.is_ident("expr") {
let value = meta.value()?;
let lit: Lit = value.parse()?;
if let Lit::Str(s) = lit {
result.expr = Some(s.value());
Ok(())
} else {
Err(meta.error("expected string literal"))
}
} else if meta.path.is_ident("copy_upload") {
result.copy_upload = true;
Ok(())
} else if meta.path.is_ident("flatten") {
result.flatten = true;
Ok(())
} else if meta.path.is_ident("upload") {
result.upload = true;
Ok(())
} else if meta.path.is_ident("spread") {
let value = meta.value()?;
let lit: Lit = value.parse()?;
if let Lit::Str(s) = lit {
result.spread = Some(s.value());
Ok(())
} else {
Err(meta.error("expected string literal"))
}
} else {
Err(meta.error("unknown device attribute"))
}
})?;
}
Ok(result)
}
// Container-level name attribute
fn get_device_name(attrs: &[Attribute], host_name: &Ident) -> syn::Result<Ident> {
for attr in attrs {
if !attr.path().is_ident("device") {
continue;
}
let mut name = None;
attr.parse_nested_meta(|meta| {
if meta.path.is_ident("name") {
let value = meta.value()?;
let lit: Lit = value.parse()?;
if let Lit::Str(s) = lit {
name = Some(format_ident!("{}", s.value()));
Ok(())
} else {
Err(meta.error("expected string literal"))
}
} else {
Ok(())
}
})?;
if let Some(n) = name {
return Ok(n);
}
}
Ok(format_ident!("Device{}", host_name))
}
// Type classification
enum FieldClass {
VecCopy(Type),
VecUploadable(Type),
Option(Type),
Arc(Type),
Plain,
}
fn classify_type(ty: &Type) -> FieldClass {
if let Some(inner) = extract_generic_arg(ty, "Vec") {
if is_copy_primitive(&inner) {
FieldClass::VecCopy(inner)
} else {
FieldClass::VecUploadable(inner)
}
} else if let Some(inner) = extract_generic_arg(ty, "Option") {
FieldClass::Option(inner)
} else if let Some(inner) = extract_generic_arg(ty, "Arc") {
FieldClass::Arc(inner)
} else {
FieldClass::Plain
}
}
fn extract_generic_arg(ty: &Type, wrapper: &str) -> Option<Type> {
if let Type::Path(type_path) = ty {
let seg = type_path.path.segments.last()?;
if seg.ident != wrapper {
return None;
}
if let PathArguments::AngleBracketed(args) = &seg.arguments {
if let Some(GenericArgument::Type(inner)) = args.args.first() {
return Some(inner.clone());
}
}
}
None
}
fn is_copy_primitive(ty: &Type) -> bool {
if let Type::Path(type_path) = ty {
if let Some(seg) = type_path.path.segments.last() {
let name = seg.ident.to_string();
return matches!(
name.as_str(),
"f32"
| "f64"
| "u8"
| "u16"
| "u32"
| "u64"
| "i8"
| "i16"
| "i32"
| "i64"
| "usize"
| "isize"
| "bool"
| "Float"
);
}
}
false
}

View file

@ -13,43 +13,25 @@ use std::sync::Arc;
#[derive(Debug, Clone)]
pub struct PiecewiseConstant1D {
func: Box<[Float]>,
cdf: Box<[Float]>,
pub device: DevicePiecewiseConstant1D,
func: Vec<Float>,
cdf: Vec<Float>,
pub min: Float,
pub max: Float,
}
impl PiecewiseConstant1D {
// Constructors
pub fn new(f: &[Float]) -> Self {
Self::new_with_bounds(f.to_vec(), 0.0, 1.0)
}
pub fn to_shared<A: GpuAllocator>(&self, arena: &Arena<A>) -> DevicePiecewiseConstant1D {
let (func_ptr, _) = arena.alloc_slice(&self.func);
let (cdf_ptr, _) = arena.alloc_slice(&self.cdf);
DevicePiecewiseConstant1D {
func: func_ptr,
cdf: cdf_ptr,
func_integral: self.func_integral,
n: self.func.len() as u32,
min: self.min,
max: self.max,
}
}
pub fn from_func<F>(f: F, min: Float, max: Float, n: usize) -> Self
where
F: Fn(Float) -> Float,
{
let delta = (max - min) / n as Float;
let values: Vec<Float> = (0..n)
.map(|i| {
let x = min + (i as Float + 0.5) * delta;
f(x)
})
.map(|i| f(min + (i as Float + 0.5) * delta))
.collect();
Self::new_with_bounds(values, min, max)
}
@ -64,74 +46,92 @@ impl PiecewiseConstant1D {
}
let func_integral = cdf[n];
if func_integral > 0.0 {
for c in &mut cdf {
*c /= func_integral;
}
}
// Convert to boxed slices (no more reallocation possible)
let func: Box<[Float]> = f.into_boxed_slice();
let cdf: Box<[Float]> = cdf.into_boxed_slice();
let device = DevicePiecewiseConstant1D {
func: func.as_ptr().into(),
cdf: cdf.as_ptr().into(),
min,
max,
n: n as u32,
func_integral,
};
Self { func, cdf, device }
Self { func: f, cdf, min, max }
}
// Accessors
pub fn min(&self) -> Float {
self.device.min
}
pub fn max(&self) -> Float {
self.device.max
}
pub fn n(&self) -> usize {
self.device.n as usize
}
pub fn n(&self) -> usize { self.func.len() }
pub fn func(&self) -> &[Float] { &self.func }
pub fn cdf(&self) -> &[Float] { &self.cdf }
pub fn integral(&self) -> Float {
self.device.func_integral
// func_integral is the un-normalized sum. After normalization cdf[n] == 1.0,
// so we reconstruct from the last CDF entry before normalization.
// But since we normalized in-place, we need to store it. Let's compute it.
let n = self.func.len();
let delta = (self.max - self.min) / n as Float;
self.func.iter().sum::<Float>() * delta
}
pub fn func(&self) -> &[Float] {
&self.func
/// Host-side sampling (for scene construction, not rendering).
/// During rendering, use the device struct via arena-uploaded Ptrs.
pub fn sample_host(&self, u: Float) -> (Float, Float, usize) {
let n = self.func.len();
let offset = self.find_interval_host(u);
let cdf_offset = self.cdf[offset];
let cdf_next = self.cdf[offset + 1];
let du = if cdf_next - cdf_offset > 0.0 {
(u - cdf_offset) / (cdf_next - cdf_offset)
} else {
0.0
};
let delta = (self.max - self.min) / n as Float;
let x = self.min + (offset as Float + du) * delta;
let func_integral = self.integral();
let pdf = if func_integral > 0.0 {
self.func[offset] / func_integral
} else {
0.0
};
(x, pdf, offset)
}
pub fn cdf(&self) -> &[Float] {
&self.cdf
fn find_interval_host(&self, u: Float) -> usize {
let n = self.func.len();
let mut size = n;
let mut first = 0usize;
while size > 0 {
let half = size >> 1;
let middle = first + half;
if self.cdf[middle] <= u {
first = middle + 1;
size -= half + 1;
} else {
size = half;
}
}
first.saturating_sub(1).min(n - 1)
}
}
impl std::ops::Deref for PiecewiseConstant1D {
type Target = DevicePiecewiseConstant1D;
fn deref(&self) -> &Self::Target {
&self.device
}
#[derive(DeviceRepr)]
#[device(name = "DevicePiecewiseConstant1D")]
pub struct PiecewiseConstant1D {
pub func: Vec<Float>,
pub cdf: Vec<Float>,
pub min: Float,
pub max: Float,
pub n: u32,
#[device(expr = "self.integral()")]
pub func_integral: Float,
}
#[derive(Debug, Clone)]
#[derive(Clone, Debug, DeviceRepr)]
#[device(name = "DevicePiecewiseConstant2D")]
pub struct PiecewiseConstant2D {
pub conditionals: Vec<PiecewiseConstant1D>,
#[device(flatten)]
pub marginal: PiecewiseConstant1D,
pub conditional_devices: Box<[DevicePiecewiseConstant1D]>,
pub device: DevicePiecewiseConstant2D,
pub n_u: u32,
pub n_v: u32,
}
impl std::ops::Deref for PiecewiseConstant2D {
type Target = DevicePiecewiseConstant2D;
fn deref(&self) -> &Self::Target {
&self.device
}
}
impl PiecewiseConstant2D {
pub fn new(data: &Array2D<Float>) -> Self {
@ -141,8 +141,8 @@ impl PiecewiseConstant2D {
pub fn new_with_bounds(data: &Array2D<Float>, domain: Bounds2f) -> Self {
Self::from_slice(
data.as_slice(),
data.x_size() as usize,
data.y_size() as usize,
data.x_size(),
data.y_size(),
domain,
)
}
@ -154,36 +154,23 @@ impl PiecewiseConstant2D {
let mut marginal_func = Vec::with_capacity(n_v);
for v in 0..n_v {
let row_start = v * n_u;
let row: Vec<Float> = data[row_start..row_start + n_u].to_vec();
let conditional =
PiecewiseConstant1D::new_with_bounds(row, domain.p_min.x(), domain.p_max.x());
let row = data[v * n_u..(v + 1) * n_u].to_vec();
let conditional = PiecewiseConstant1D::new_with_bounds(
row,
domain.p_min.x(),
domain.p_max.x(),
);
marginal_func.push(conditional.integral());
conditionals.push(conditional);
}
let marginal =
PiecewiseConstant1D::new_with_bounds(marginal_func, domain.p_min.y(), domain.p_max.y());
let marginal = PiecewiseConstant1D::new_with_bounds(
marginal_func,
domain.p_min.y(),
domain.p_max.y(),
);
let conditional_devices: Box<[DevicePiecewiseConstant1D]> = conditionals
.iter()
.map(|c| c.device)
.collect::<Vec<_>>()
.into_boxed_slice();
let device = DevicePiecewiseConstant2D {
conditionals: conditional_devices.as_ptr().into(),
marginal: marginal.device,
n_u: n_u as u32,
n_v: n_v as u32,
};
Self {
conditionals,
marginal,
conditional_devices,
device,
}
Self { conditionals, marginal, n_u, n_v }
}
pub fn from_image(image: &Image) -> Self {
@ -192,12 +179,9 @@ impl PiecewiseConstant2D {
let n_v = res.y() as usize;
let mut data = Vec::with_capacity(n_u * n_v);
for v in 0..n_v {
for u in 0..n_u {
let p = Point2i::new(u as i32, v as i32);
let luminance = image.get_channels(p).average();
data.push(luminance);
data.push(image.get_channels(Point2i::new(u as i32, v as i32)).average());
}
}
@ -217,10 +201,14 @@ struct PiecewiseLinear2DStorage<const N: usize> {
}
pub struct PiecewiseLinear2DHost<const N: usize> {
pub view: PiecewiseLinear2D<N>,
_storage: Arc<PiecewiseLinear2DStorage<N>>,
size: Vector2i,
inv_patch_size: Vector2f,
param_size: [u32; N],
param_strides: [u32; N],
storage: Arc<PiecewiseLinear2DStorage<N>>,
}
impl<const N: usize> PiecewiseLinear2DHost<N> {
pub fn new(
data: &[Float],
@ -354,27 +342,49 @@ impl<const N: usize> PiecewiseLinear2DHost<N> {
}
}
impl<const N: usize> DeviceRepr for PiecewiseLinear2DHost<N> {
type Target = PiecewiseLinear2D<N>;
fn upload_value<A: GpuAllocator>(&self, arena: &Arena<A>) -> PiecewiseLinear2D<N> {
let s = &self.storage;
let (data_ptr, _) = arena.alloc_slice(&s.data);
let (marginal_ptr, _) = arena.alloc_slice(&s.marginal_cdf);
let (conditional_ptr, _) = arena.alloc_slice(&s.conditional_cdf);
let param_ptrs: [Ptr<Float>; N] = std::array::from_fn(|i| {
let (ptr, _) = arena.alloc_slice(&s.param_values[i]);
ptr
});
PiecewiseLinear2D {
size: self.size,
inv_patch_size: self.inv_patch_size,
param_size: self.param_size,
param_strides: self.param_strides,
param_values: param_ptrs,
data: data_ptr,
marginal_cdf: marginal_ptr,
conditional_cdf: conditional_ptr,
}
}
}
#[derive(Debug, Clone)]
pub struct AliasTableHost {
pub view: AliasTable,
_storage: Vec<Bin>,
bins: Vec<Bin>,
}
impl AliasTableHost {
pub fn new(weights: &[Float]) -> Self {
let n = weights.len();
if n == 0 {
return Self {
view: AliasTable {
bins: Ptr::null(),
size: 0,
},
_storage: Vec::new(),
};
return Self { bins: Vec::new() };
}
let sum: f64 = weights.iter().map(|&w| w as f64).sum();
assert!(sum > 0.0, "Sum of weights must be positive");
let mut bins = Vec::with_capacity(n);
for &w in weights {
bins.push(Bin {
@ -384,10 +394,7 @@ impl AliasTableHost {
});
}
struct Outcome {
p_hat: f64,
index: usize,
}
struct Outcome { p_hat: f64, index: usize }
let mut under = Vec::with_capacity(n);
let mut over = Vec::with_capacity(n);
@ -409,17 +416,10 @@ impl AliasTableHost {
bins[un.index].alias = ov.index as u32;
let p_excess = un.p_hat + ov.p_hat - 1.0;
if p_excess < 1.0 {
under.push(Outcome {
p_hat: p_excess,
index: ov.index,
});
under.push(Outcome { p_hat: p_excess, index: ov.index });
} else {
over.push(Outcome {
p_hat: p_excess,
index: ov.index,
});
over.push(Outcome { p_hat: p_excess, index: ov.index });
}
}
@ -427,51 +427,40 @@ impl AliasTableHost {
bins[ov.index].q = 1.0;
bins[ov.index].alias = ov.index as u32;
}
while let Some(un) = under.pop() {
bins[un.index].q = 1.0;
bins[un.index].alias = un.index as u32;
}
let view = AliasTable {
bins: bins.as_ptr().into(),
size: bins.len() as u32,
};
Self {
view,
_storage: bins,
}
Self { bins }
}
pub fn to_device<A: GpuAllocator>(&self, arena: &Arena<A>) -> AliasTable {
if self._storage.is_empty() {
return AliasTable {
bins: Ptr::null(),
size: 0,
};
pub fn size(&self) -> usize { self.bins.len() }
pub fn is_empty(&self) -> bool { self.bins.is_empty() }
}
impl DeviceRepr for AliasTableHost {
type Target = AliasTable;
fn upload_value<A: GpuAllocator>(&self, arena: &Arena<A>) -> AliasTable {
if self.bins.is_empty() {
return AliasTable { bins: Ptr::null(), size: 0 };
}
let (bins_ptr, _) = arena.alloc_slice(&self._storage);
let (bins_ptr, _) = arena.alloc_slice(&self.bins);
AliasTable {
bins: bins_ptr,
size: self._storage.len() as u32,
size: self.bins.len() as u32,
}
}
}
#[derive(Clone, Debug)]
#[derive(Clone, Debug, DeviceRepr)]
#[device(name = "DeviceSummedAreaTable")]
pub struct SummedAreaTable {
pub device: DeviceSummedAreaTable,
#[device(flatten)]
sum: Array2D<f64>,
}
impl std::ops::Deref for SummedAreaTable {
type Target = DeviceSummedAreaTable;
fn deref(&self) -> &Self::Target {
&self.device
}
}
impl SummedAreaTable {
pub fn new(values: &Array2D<Float>) -> Self {
let width = values.x_size() as i32;
@ -483,46 +472,194 @@ impl SummedAreaTable {
for x in 1..width {
sum[(x, 0)] = values[(x, 0)] as f64 + sum[(x - 1, 0)];
}
for y in 1..height {
sum[(0, y)] = values[(0, y)] as f64 + sum[(0, y - 1)];
}
for y in 1..height {
for x in 1..width {
let term = values[(x, y)] as f64;
let left = sum[(x - 1, y)];
let up = sum[(x, y - 1)];
let diag = sum[(x - 1, y - 1)];
sum[(x, y)] = term + left + up - diag;
sum[(x, y)] = values[(x, y)] as f64
+ sum[(x - 1, y)]
+ sum[(x, y - 1)]
- sum[(x - 1, y - 1)];
}
}
let device = DeviceSummedAreaTable { sum: *sum };
Self { device, sum }
Self { sum }
}
}
#[derive(Clone, Debug)]
#[derive(Clone, Debug, DeviceRepr)]
#[device(name = "DeviceWindowedPiecewiseConstant2D")]
pub struct WindowedPiecewiseConstant2D {
pub device: DeviceWindowedPiecewiseConstant2D,
sat: DeviceSummedAreaTable,
#[device(flatten)]
sat: SummedAreaTable,
#[device(flatten)]
func: Array2D<Float>,
}
impl std::ops::Deref for WindowedPiecewiseConstant2D {
type Target = DeviceWindowedPiecewiseConstant2D;
fn deref(&self) -> &Self::Target {
&self.device
}
}
impl WindowedPiecewiseConstant2D {
pub fn new(func: Array2D<Float>) -> Self {
let sat = *SummedAreaTable::new(&func);
let device = DeviceWindowedPiecewiseConstant2D { sat, func: *func };
Self { sat, func, device }
let sat = SummedAreaTable::new(&func);
Self { sat, func }
}
}
struct PiecewiseLinear2DStorage<const N: usize> {
data: Vec<Float>,
marginal_cdf: Vec<Float>,
conditional_cdf: Vec<Float>,
param_values: [Vec<Float>; N],
}
pub struct PiecewiseLinear2DHost<const N: usize> {
size: Vector2i,
inv_patch_size: Vector2f,
param_size: [u32; N],
param_strides: [u32; N],
storage: Arc<PiecewiseLinear2DStorage<N>>,
}
impl<const N: usize> PiecewiseLinear2DHost<N> {
pub fn new(
data: &[Float],
x_size: i32,
y_size: i32,
param_res: [usize; N],
param_values: [&[Float]; N],
normalize: bool,
build_cdf: bool,
) -> Self {
if build_cdf && !normalize {
panic!("PiecewiseLinear2D: build_cdf implies normalize=true");
}
let size = Vector2i::new(x_size, y_size);
let inv_patch_size = Vector2f::new(
1.0 / (x_size - 1) as Float,
1.0 / (y_size - 1) as Float,
);
let mut param_size = [0u32; N];
let mut param_strides = [0u32; N];
let owned_param_values: [Vec<Float>; N] = gpu_array_from_fn(|i| param_values[i].to_vec());
let mut slices: u32 = 1;
for i in (0..N).rev() {
assert!(param_res[i] >= 1, "Parameter resolution must be >= 1");
param_size[i] = param_res[i] as u32;
param_strides[i] = if param_res[i] > 1 { slices } else { 0 };
slices *= param_size[i];
}
let n_values = (x_size * y_size) as usize;
let mut new_data = vec![0.0; slices as usize * n_values];
let mut marginal_cdf = if build_cdf {
vec![0.0; slices as usize * y_size as usize]
} else {
Vec::new()
};
let mut conditional_cdf = if build_cdf {
vec![0.0; slices as usize * n_values]
} else {
Vec::new()
};
let mut data_offset = 0;
for slice in 0..slices as usize {
let slice_offset = slice * n_values;
let current_data = &data[data_offset..data_offset + n_values];
let mut sum = 0.0_f64;
if normalize {
for y in 0..(y_size - 1) {
for x in 0..(x_size - 1) {
let i = (y * x_size + x) as usize;
let v00 = current_data[i] as f64;
let v10 = current_data[i + 1] as f64;
let v01 = current_data[i + x_size as usize] as f64;
let v11 = current_data[i + 1 + x_size as usize] as f64;
sum += 0.25 * (v00 + v10 + v01 + v11);
}
}
}
let normalization = if normalize && sum > 0.0 {
1.0 / sum as Float
} else {
1.0
};
for k in 0..n_values {
new_data[slice_offset + k] = current_data[k] * normalization;
}
if build_cdf {
let marginal_slice_offset = slice * y_size as usize;
for y in 0..y_size as usize {
let mut cdf_sum = 0.0;
let i_base = y * x_size as usize;
conditional_cdf[slice_offset + i_base] = 0.0;
for x in 0..(x_size - 1) as usize {
let i = i_base + x;
cdf_sum += 0.5
* (new_data[slice_offset + i] + new_data[slice_offset + i + 1]);
conditional_cdf[slice_offset + i + 1] = cdf_sum;
}
}
marginal_cdf[marginal_slice_offset] = 0.0;
let mut marginal_sum = 0.0;
for y in 0..(y_size - 1) as usize {
let cdf1 =
conditional_cdf[slice_offset + (y + 1) * x_size as usize - 1];
let cdf2 =
conditional_cdf[slice_offset + (y + 2) * x_size as usize - 1];
marginal_sum += 0.5 * (cdf1 + cdf2);
marginal_cdf[marginal_slice_offset + y + 1] = marginal_sum;
}
}
data_offset += n_values;
}
let storage = Arc::new(PiecewiseLinear2DStorage {
data: new_data,
marginal_cdf,
conditional_cdf,
param_values: owned_param_values,
});
Self {
size,
inv_patch_size,
param_size,
param_strides,
storage,
}
}
}
impl<const N: usize> DeviceRepr for PiecewiseLinear2DHost<N> {
type Target = PiecewiseLinear2D<N>;
fn upload_value<A: GpuAllocator>(&self, arena: &Arena<A>) -> PiecewiseLinear2D<N> {
let s = &self.storage;
let (data_ptr, _) = arena.alloc_slice(&s.data);
let (marginal_ptr, _) = arena.alloc_slice(&s.marginal_cdf);
let (conditional_ptr, _) = arena.alloc_slice(&s.conditional_cdf);
let param_ptrs: [Ptr<Float>; N] = std::array::from_fn(|i| {
let (ptr, _) = arena.alloc_slice(&s.param_values[i]);
ptr
});
PiecewiseLinear2D {
size: self.size,
inv_patch_size: self.inv_patch_size,
param_size: self.param_size,
param_strides: self.param_strides,
param_values: param_ptrs,
data: data_ptr,
marginal_cdf: marginal_ptr,
conditional_cdf: conditional_ptr,
}
}
}