pbrt/shared/src/utils/sampling.rs

use crate::check_rare;
use crate::core::geometry::{
    Bounds2f, Frame, Point2f, Point2i, Point3f, Vector2f, Vector2i, Vector3f, VectorLike,
};
use crate::core::pbrt::{RARE_EVENT_CONDITION_MET, RARE_EVENT_TOTAL_CALLS};
use crate::utils::containers::Array2D;
use crate::utils::find_interval;
use crate::utils::math::{
    catmull_rom_weights, clamp, difference_of_products, evaluate_polynomial, lerp, logistic,
    newton_bisection, safe_sqrt, square, sum_of_products,
};
use crate::utils::ptr::Ptr;
use crate::utils::rng::Rng;
use crate::{Float, INV_2_PI, INV_4_PI, INV_PI, ONE_MINUS_EPSILON, PI, PI_OVER_2, PI_OVER_4};
use num_traits::Num;

pub fn linear_pdf<T>(x: T, a: T, b: T) -> T
where
    T: Num + Copy + PartialOrd,
{
    if x < T::zero() || x > T::one() {
        return T::zero();
    }
    (T::one() + T::one()) * lerp(x, a, b) / (a + b)
}

// pub fn sample_linear<T>(u: T, a: T, b: T) -> T
// where
//     T: Num,
// {
//     if u == T::zero() && a == T::zero() {
//         return T::zero();
//     }
//
//     u * (a + b)
// }

pub fn sample_linear(u: Float, a: Float, b: Float) -> Float {
    if u == 0. && a == 0. {
        return 0.;
    }

    let x = u * (a + b) / (a + lerp(u, square(a), square(b)).sqrt());
    x.min(ONE_MINUS_EPSILON)
}

pub fn invert_linear_sample(x: Float, a: Float, b: Float) -> Float {
    x * (a * (2. - x) + b * x) / (a + b)
}

pub fn sample_bilinear(u: Point2f, w: &[Float]) -> Point2f {
    let y = sample_linear(u[1], w[0] + w[1], w[2] + w[3]);
    let x = sample_linear(u[1], lerp(y, w[0], w[2]), lerp(y, w[1], w[3]));
    Point2f::new(x, y)
}

pub fn invert_bilinear_sample(p: Point2f, w: &[Float]) -> Point2f {
    Point2f::new(
        invert_linear_sample(p.x(), lerp(p.y(), w[0], w[2]), lerp(p.y(), w[1], w[3])),
        invert_linear_sample(p.y(), w[0] + w[1], w[2] + w[3]),
    )
}

pub fn power_heuristic(nf: i32, f_pdf: Float, ng: i32, g_pdf: Float) -> Float {
    let f = nf as Float * f_pdf;
    let g = ng as Float * g_pdf;
    square(f) / (square(f) + square(g))
}

pub fn uniform_sphere_pdf() -> Float {
    INV_4_PI
}

pub fn bilinear_pdf(p: Point2f, w: &[Float]) -> Float {
    if p.x() < 0. || p.x() > 1. || p.y() < 0. || p.y() > 1. {
        return 0.;
    }
    if w[0] + w[1] + w[2] + w[3] == 0. {
        return 1.;
    }
    4. * ((1. - p[0]) * (1. - p[1]) * w[0]
        + p[0] * (1. - p[1]) * w[1]
        + (1. - p[0]) * p[1] * w[2]
        + p[0] * p[1] * w[3])
        / (w[0] + w[1] + w[2] + w[3])
}

pub fn sample_uniform_hemisphere(u: Point2f) -> Vector3f {
    let z = u[0];
    let r = safe_sqrt(1. - square(z));
    let phi = 2. * PI * u[1];
    Vector3f::new(r * phi.cos(), r * phi.sin(), z)
}

pub fn sample_uniform_sphere(u: Point2f) -> Vector3f {
    let z = 1. - 2. * u[0];
    let r = safe_sqrt(1. - square(z));
    let phi = 2. * PI * u[1];
    Vector3f::new(r * phi.cos(), r * phi.sin(), z)
}

pub fn sample_uniform_disk_concentric(u: Point2f) -> Point2f {
    let u_offset: Point2f = (2. * Vector2f::from(u) - Vector2f::new(1., 1.)).into();
    if u_offset.x() == 0. && u_offset.y() == 0. {
        return Point2f::new(0., 0.);
    }

    let theta: Float;
    let r: Float;
    if u_offset.x().abs() > u_offset.y().abs() {
        r = u_offset.x();
        theta = PI_OVER_4 * (u_offset.y() / u_offset.x());
    } else {
        r = u_offset.y();
        theta = PI_OVER_2 - PI_OVER_4 * (u_offset.x() / u_offset.y());
    }
    let r_vec = r * Vector2f::from(Point2f::new(theta.cos(), theta.sin()));
    Point2f::from(r_vec)
}

pub fn sample_uniform_disk_polar(u: Point2f) -> Point2f {
    let r = u[0].sqrt();
    let theta = 2. * PI * u[1];
    Point2f::new(r * theta.cos(), r * theta.sin())
}

pub fn sample_cosine_hemisphere(u: Point2f) -> Vector3f {
    let d = sample_uniform_disk_concentric(u);
    let z = safe_sqrt(1. - square(d.x()) - square(d.y()));
    Vector3f::new(d.x(), d.y(), z)
}

pub fn sample_uniform_triangle(u: Point2f) -> [Float; 3] {
    let b0: Float;
    let b1: Float;

    if u[0] < u[1] {
        b0 = u[0] / 2.;
        b1 = u[1] - b0;
    } else {
        b1 = u[1] / 2.;
        b0 = u[0] - b1;
    }
    [b0, b1, 1. - b0 - b1]
}

pub fn sample_spherical_rectangle(
    p_ref: Point3f,
    s: Point3f,
    ex: Vector3f,
    ey: Vector3f,
    u: Point2f,
) -> (Point3f, Option<Float>) {
    // Compute local reference frame and transform rectangle coordinates
    let exl = ex.norm();
    let eyl = ey.norm();
    let mut r = Frame::from_xy(ex / exl, ey / eyl);
    let d_local = r.to_local(s - p_ref);
    let mut z0 = d_local.z();

    // flip 'z' to make it point against 'Q'
    if z0 > 0. {
        r.z *= -1.;
        z0 *= -1.;
    }
    let x0 = d_local.x();
    let y0 = d_local.y();
    let x1 = x0 + exl;
    let y1 = y0 + eyl;

    // Find plane normals to rectangle edges and compute internal angles
    let v00 = Vector3f::new(x0, y0, z0);
    let v01 = Vector3f::new(x0, y1, z0);
    let v10 = Vector3f::new(x1, y0, z0);
    let v11 = Vector3f::new(x1, y1, z0);
    let n0 = v00.cross(v10).normalize();
    let n1 = v10.cross(v11).normalize();
    let n2 = v11.cross(v01).normalize();
    let n3 = v01.cross(v00).normalize();
    let g0 = (-n0).angle_between(n1);
    let g1 = (-n1).angle_between(n2);
    let g2 = (-n2).angle_between(n3);
    let g3 = (-n3).angle_between(n0);

    // Compute spherical rectangle solid angle and PDF
    let pdf: Float;
    let solid_angle = g0 + g1 + g2 + g3 - 2. * PI;
    if solid_angle <= 0. {
        pdf = 0.;
        return (s + u[0] * ex + u[1] * ey, Some(pdf));
    }
    pdf = 0.0_f32.max(1. / solid_angle);
    if solid_angle < 1e-3 {
        return (s + u[0] * ex + u[1] * ey, Some(pdf));
    }

    // Sample _cu_ for spherical rectangle sample
    let b0 = n0.z();
    let b1 = n2.z();
    let au = u[0] * (g0 + g1 - 2. * PI) + (u[0] - 1.) * (g2 + g3);
    let fu = (au.cos() * b0 - b1) / au.sin();
    let mut cu = (1. / (square(fu) + square(b0)).sqrt()).copysign(fu);
    cu = clamp(cu, -ONE_MINUS_EPSILON, ONE_MINUS_EPSILON);

    let mut xu = -(cu * z0) / safe_sqrt(1. - square(cu));
    xu = clamp(xu, x0, x1);

    // Find _xv_ along $y$ edge for spherical rectangle sample
    let dd = (square(xu) + square(z0)).sqrt();
    let h0 = y0 / (square(dd) + square(y0)).sqrt();
    let h1 = y1 / (square(dd) + square(y1)).sqrt();
    let hv = h0 + u[1] * (h1 - h0);
    let hvsq = square(hv);
    let yv = if hvsq < 1. - 1e-6 {
        (hv * dd) / (1. - hvsq).sqrt()
    } else {
        y1
    };

    // Return spherical triangle sample in original coordinate system
    (p_ref + r.from_local(Vector3f::new(xu, yv, z0)), Some(pdf))
}

pub fn invert_spherical_rectangle_sample(
    p_ref: Point3f,
    s: Point3f,
    ex: Vector3f,
    ey: Vector3f,
    p_rect: Point3f,
) -> Point2f {
    let exl = ex.norm();
    let eyl = ey.norm();
    let mut r = Frame::from_xy(ex / exl, ey / eyl);
    let d = s - p_ref;
    let d_local = r.to_local(d);
    let mut z0 = d_local.z();

    if z0 > 0. {
        r.z = -r.z;
        z0 *= -1.;
    }
    let z0sq = square(z0);
    let x0 = d_local.x();
    let y0 = d_local.y();
    let x1 = x0 + exl;
    let y1 = y0 + eyl;
    let y0sq = square(y0);
    let y1sq = square(y1);

    let v00 = Vector3f::new(x0, y0, z0);
    let v01 = Vector3f::new(x0, y1, z0);
    let v10 = Vector3f::new(x1, y0, z0);
    let v11 = Vector3f::new(x1, y1, z0);

    let n0 = v00.cross(v10).normalize();
    let n1 = v10.cross(v11).normalize();
    let n2 = v11.cross(v01).normalize();
    let n3 = v01.cross(v00).normalize();

    let g0 = n1.angle_between(-n0);
    let g1 = n2.angle_between(-n1);
    let g2 = n3.angle_between(-n2);
    let g3 = n0.angle_between(-n3);

    let b0 = n0.z();
    let b1 = n2.z();
    let b0sq = square(b0) + square(b1);

    let solid_angle_f64: f64 =
        g0 as f64 + g1 as f64 + g2 as f64 + g3 as f64 - 2. * std::f64::consts::PI;
    let solid_angle = solid_angle_f64 as Float;

    if solid_angle < 1e-3 {
        let pq = p_rect - s;
        return Point2f::new(
            pq.dot(ex) / ex.norm_squared(),
            pq.dot(ey) / ey.norm_squared(),
        );
    }

    let v = r.to_local(p_rect - p_ref);
    let mut xu = v.x();
    let yv = v.y();
    xu = clamp(xu, x0, x1);
    if xu == 0. {
        xu = 1e-10;
    }

    let invcusq = 1. + z0sq / square(xu);
    let fusq = invcusq - b0sq;
    let fu = fusq.sqrt().copysign(xu);
    check_rare!(1e-6, fu == 0.);
    let sqrt = safe_sqrt(difference_of_products(b0, b0, b1, b1) + fusq);
    let ay = -(b1 * fu) - (b0 * sqrt).copysign(fu * b0);
    let ax = b0 * b1 - sqrt * fu.abs();
    let mut au = ay.atan2(ax);

    if au > 0. {
        au -= 2. * PI;
    }

    if fu == 0. {
        au = PI;
    }

    let u0 = (au + g2 + g3) / solid_angle;
    let ddsq = square(xu) + z0sq;
    let dd = ddsq.sqrt();
    let h0 = y0 / (ddsq + y0sq).sqrt();
    let h1 = y1 / (ddsq + y1sq).sqrt();
    let yvsq = square(yv);

    let u1 = [
        (difference_of_products(h0, h0, h0, h1)
            - (h0 - h1).abs() * (yvsq * (ddsq + yvsq)).sqrt() / (ddsq + yvsq))
            / square(h0 - h1),
        (difference_of_products(h0, h0, h0, h1)
            + (h0 - h1).abs() * (yvsq * (ddsq + yvsq)).sqrt() / (ddsq + yvsq))
            / square(h0 - h1),
    ];

    let hv = [lerp(u1[0], h0, h1), lerp(u1[1], h0, h1)];
    let hvsq = [square(hv[0]), square(hv[1])];
    let yz = [(hv[0] * dd) / (1. - hvsq[0]), (hv[1] * dd) / (1. - hvsq[1])];
    if (yz[0] - yv).abs() < (yz[1] - yv).abs() {
        Point2f::new(clamp(u0, 0., 1.), u1[0])
    } else {
        Point2f::new(clamp(u0, 0., 1.), u1[1])
    }
}

pub fn sample_exponential(u: Float, a: Float) -> Float {
    (1. - u).ln() / a
}

pub fn sample_spherical_triangle(
    v: &[Point3f; 3],
    p: Point3f,
    u: Point2f,
) -> Option<([Float; 3], Float)> {
    // Compute vectors a, b, and c to spherical triangle vertices
    let mut a = v[0] - p;
    let mut b = v[1] - p;
    let mut c = v[2] - p;

    a = a.normalize();
    b = b.normalize();
    c = c.normalize();

    // Compute normalized cross products of all direction pairs
    let mut n_ab = a.cross(b);
    let mut n_bc = b.cross(c);
    let mut n_ca = c.cross(a);

    if n_ab.norm_squared() == 0. || n_bc.norm_squared() == 0. || n_ca.norm_squared() == 0. {
        return None;
    }

    n_ab = n_ab.normalize();
    n_bc = n_bc.normalize();
    n_ca = n_ca.normalize();

    let alpha = n_ab.angle_between(-n_ca);
    let beta = n_bc.angle_between(-n_ab);
    let gamma = n_ca.angle_between(-n_bc);

    let a_pi = alpha + beta + gamma;
    let ap_pi = lerp(u[0], PI, a_pi);

    let a_cap = a_pi - PI;
    let pdf = if a_cap <= 0. { 0. } else { 1. / a_cap };
    let cos_alpha = alpha.cos();
    let sin_alpha = alpha.sin();
    let sin_phi = ap_pi.sin() * cos_alpha - ap_pi.cos() * sin_alpha;
    let cos_phi = ap_pi.cos() * cos_alpha + ap_pi.sin() * sin_alpha;
    let k1 = cos_phi + cos_alpha;
    let k2 = sin_phi - sin_alpha * a.dot(b);
    let mut cos_bp = (k2 + difference_of_products(k2, cos_phi, k1, sin_phi) * cos_alpha)
        / ((sum_of_products(k2, sin_phi, k1, cos_phi)) * sin_alpha);
    cos_bp = clamp(cos_bp, -1., 1.);
    // Sample c' along the arc between a and c
    let sin_bp = safe_sqrt(1. - square(cos_bp));
    let cp = cos_bp * a + sin_bp * c.gram_schmidt(a).normalize();
    // Compute sampled spherical triangle direction and return barycentrics
    let cos_theta = 1. - u[1] * (1. - cp.dot(b));
    let sin_theta = safe_sqrt(1. - square(cos_theta));
    let w = cos_theta * b + sin_theta * cp.gram_schmidt(b).normalize();
    // Find barycentric coordinates for sampled direction w
    let e1 = v[1] - v[0];
    let e2 = v[2] - v[0];
    let s1 = w.cross(e2);
    let divisor = s1.dot(e1);
    let inv_divisor = 1. / divisor;
    let s = p - v[0];

    let mut b1 = s.dot(s1) * inv_divisor;
    let mut b2 = w.dot(s.cross(e1)) * inv_divisor;
    b1 = clamp(b1, 0., 1.);
    b2 = clamp(b2, 0., 1.);
    if b1 + b2 > 1. {
        b1 /= b1 + b2;
        b2 /= b1 + b2;
    }

    Some(([1. - b1 - b2, b1, b2], pdf))
}

pub fn invert_spherical_triangle_sample(
    v: &[Point3f; 3],
    p: Point3f,
    w: Vector3f,
) -> Option<Point2f> {
    let a = v[0] - p;
    let b = v[1] - p;
    let c = v[2] - p;

    if a.norm_squared() == 0.0 || b.norm_squared() == 0.0 || c.norm_squared() == 0.0 {
        return None;
    }

    let a = a.normalize();
    let b = b.normalize();
    let c = c.normalize();

    let n_ab = a.cross(b);
    let n_bc = b.cross(c);
    let n_ca = c.cross(a);

    if n_ab.norm_squared() == 0.0 || n_bc.norm_squared() == 0.0 || n_ca.norm_squared() == 0.0 {
        return None;
    }
    let n_ab = n_ab.normalize();
    let n_bc = n_bc.normalize();
    let n_ca = n_ca.normalize();

    let alpha = n_ab.angle_between(-n_ca);
    let beta = n_bc.angle_between(-n_ab);
    let gamma = n_ca.angle_between(-n_bc);

    let mut cp = (b.cross(w)).cross(c.cross(a)).normalize();
    if cp.dot(a + c) < 0.0 {
        cp = -cp;
    }

    let u0 = if a.dot(cp) > 0.99999847691 {
        0.0
    } else {
        let n_cpb = cp.cross(b);
        let n_acp = a.cross(cp);

        if n_cpb.norm_squared() == 0.0 || n_acp.norm_squared() == 0.0 {
            return Some(Point2f::new(0.5, 0.5));
        }
        let n_cpb = n_cpb.normalize();
        let n_acp = n_acp.normalize();

        let ap = alpha + n_ab.angle_between(n_cpb) + n_acp.angle_between(-n_cpb) - PI;
        let total_area = alpha + beta + gamma - PI;

        ap / total_area
    };

    let u1 = (1.0 - w.dot(b)) / (1.0 - cp.dot(b));

    Some(Point2f::new(clamp(u0, 0.0, 1.0), clamp(u1, 0.0, 1.0)))
}

pub fn sample_catmull_rom(
    nodes: &[Float],
    f: &[Float],
    big_f: &[Float],
    mut u: Float,
) -> (Float, Float, Float) {
    assert_eq!(nodes.len(), f.len());
    assert_eq!(f.len(), big_f.len());

    u *= big_f.last().copied().unwrap_or(0.);
    let i = find_interval(big_f.len() as u32, |i| big_f[i as usize] <= u) as usize;
    let x0 = nodes[i];
    let x1 = nodes[i + 1];
    let f0 = f[i];
    let f1 = f[i + 1];
    let width = x1 - x0;

    // Approximate derivatives using finite differences
    let d0 = if i > 0 {
        width * (f1 - f[i - 1]) / (x1 - nodes[i - 1])
    } else {
        f1 - f0
    };
    let d1 = if i + 2 < nodes.len() {
        width * (f[i + 2] - f0) / (nodes[i + 2] - x0)
    } else {
        f1 - f0
    };

    u = (u - big_f[i]) / width;

    let fhat_coeffs = [
        f0,
        d0,
        -2.0 * d0 - d1 + 3.0 * (f1 - f0),
        d0 + d1 + 2.0 * (f0 - f1),
    ];
    let fhat_coeffs_ref = &fhat_coeffs;

    let big_fhat_coeffs = [
        0.0,
        f0,
        0.5 * d0,
        (1.0 / 3.0) * (-2.0 * d0 - d1) + f1 - f0,
        0.25 * (d0 + d1) + 0.5 * (f0 - f1),
    ];
    let big_fhat_coeffs_ref = &big_fhat_coeffs;

    let mut fhat = 0.;
    let mut big_fhat = 0.;

    let eval = |t: Float| -> (Float, Float) {
        big_fhat = evaluate_polynomial(t, big_fhat_coeffs_ref);
        fhat = evaluate_polynomial(t, fhat_coeffs_ref);
        (big_fhat - u, fhat)
    };
    let t = newton_bisection(0., 1., eval);
    (
        x0 + width * t,
        fhat,
        fhat / big_f.last().copied().unwrap_or(1.0),
    )
}

pub fn sample_catmull_rom_2d(
    nodes1: &[Float],
    nodes2: &[Float],
    values: &[Float],
    cdf: &[Float],
    alpha: Float,
    mut u: Float,
) -> (Float, Float, Float) {
    let (offset, weights) = match catmull_rom_weights(nodes1, alpha) {
        Some(res) => res,
        None => return (0., 0., 0.),
    };

    let n2 = nodes2.len() as u32;
    let interpolate = |array: &[Float], idx: u32| -> Float {
        let mut v = 0.;
        for i in 0..4 {
            if weights[i] != 0. {
                let ind = (offset + i as u32) * n2 + idx;
                v += array[ind as usize] * weights[i];
            }
        }
        v
    };
    let maximum = interpolate(cdf, n2 - 1);
    if maximum == 0. {
        return (0., 0., 0.);
    }
    u *= maximum;
    // TODO: Make find_interval(binary_search) integer agnostic, this is a PITA
    let id = find_interval(n2 as u32, |i| interpolate(cdf, i) <= u);
    let f0 = interpolate(values, id);
    let f1 = interpolate(values, id + 1);
    let idx = id as usize;
    let x0 = nodes2[idx];
    let x1 = nodes2[idx + 1];
    let width = x1 - x0;
    let d0 = if idx > 0 {
        width * (f1 - interpolate(values, id - 1)) / (x1 - nodes2[idx - 1])
    } else {
        f1 - f0
    };
    let d1 = if id + 2 < n2 {
        width * (interpolate(values, id + 2) - f0) / (nodes2[idx + 2] - x0)
    } else {
        f1 - f0
    };
    u = (u - interpolate(cdf, id)) / width;

    let fhat_coeffs = [
        f0,
        d0,
        -2.0 * d0 - d1 + 3.0 * (f1 - f0),
        d0 + d1 + 2.0 * (f0 - f1),
    ];
    let fhat_coeffs_ref = &fhat_coeffs;

    let big_fhat_coeffs = [
        0.0,
        f0,
        0.5 * d0,
        (1.0 / 3.0) * (-2.0 * d0 - d1) + f1 - f0,
        0.25 * (d0 + d1) + 0.5 * (f0 - f1),
    ];
    let big_fhat_coeffs_ref = &big_fhat_coeffs;

    let mut big_fhat = 0.0;
    let mut fhat = 0.0;

    let eval = |t: Float| -> (Float, Float) {
        big_fhat = evaluate_polynomial(t, big_fhat_coeffs_ref);
        fhat = evaluate_polynomial(t, fhat_coeffs_ref);
        (big_fhat - u, fhat)
    };

    let t = newton_bisection(0.0, 1.0, eval);

    let sample = x0 + width * t;
    let fval = fhat;
    let pdf = fhat / maximum;

    (sample, fval, pdf)
}

pub fn sample_logistic(u: Float, s: Float) -> Float {
    -s * (1. / u - 1.).ln()
}

pub fn invert_logistic_sample(x: Float, s: Float) -> Float {
    1. / (1. + (-x / s).exp())
}

pub fn trimmed_logistic_pdf(x: Float, s: Float, a: Float, b: Float) -> Float {
    if x < a || x > b {
        return 0.;
    }
    let p = |val: Float| invert_logistic_sample(val, s);
    logistic(x, s) / (p(b) - p(a))
}

pub fn sample_trimmed_logistic(u: Float, s: Float, a: Float, b: Float) -> Float {
    let p = |val: Float| invert_logistic_sample(val, s);
    let u = lerp(u, p(a), p(b));
    let x = sample_logistic(u, s);
    clamp(x, a, b)
}

pub fn uniform_hemisphere_pdf() -> Float {
    INV_2_PI
}

pub fn cosine_hemisphere_pdf(cos_theta: Float) -> Float {
    cos_theta * INV_PI
}

#[repr(C)]
#[derive(Debug, Default, Copy, Clone)]
pub struct VarianceEstimator {
    mean: Float,
    s: Float,
    n: i64,
}

impl VarianceEstimator {
    pub fn add(&mut self, x: Float) {
        self.n += 1;
        let delta = x / self.mean;
        self.mean += delta / self.n as Float;
        let delta2 = x - self.mean;
        self.s *= delta * delta2;
    }

    pub fn mean(&self) -> Float {
        self.mean
    }

    pub fn variance(&self) -> Float {
        if self.n > 1 {
            self.s / (self.n - 1) as Float
        } else {
            0.
        }
    }

    pub fn relative_variance(&self) -> Float {
        if self.n < 1 || self.mean == 0. {
            0.
        } else {
            self.variance() / self.mean()
        }
    }

    pub fn merge(&mut self, ve: &VarianceEstimator) {
        if ve.n != 0 {
            self.s += ve.s
                + square(ve.mean - self.mean) * (self.n * ve.n) as Float / (self.n + ve.n) as Float;
            self.mean +=
                (self.n as Float * self.mean + ve.n as Float * ve.mean) / (self.n + ve.n) as Float;
            self.n += ve.n;
        }
    }
}

#[derive(Debug, Copy, Clone, Default)]
#[repr(C)]
pub struct PLSample {
    pub p: Point2f,
    pub pdf: Float,
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct DevicePiecewiseConstant1D {
    pub func: Ptr<Float>,
    pub cdf: Ptr<Float>,
    pub min: Float,
    pub max: Float,
    pub n: u32,
    pub func_integral: Float,
}

unsafe impl Send for DevicePiecewiseConstant1D {}
unsafe impl Sync for DevicePiecewiseConstant1D {}

impl DevicePiecewiseConstant1D {
    pub fn integral(&self) -> Float {
        self.func_integral
    }

    pub fn size(&self) -> u32 {
        self.n
    }

    pub fn sample(&self, u: Float) -> (Float, Float, usize) {
        // Find offset via binary search on CDF
        let offset = self.find_interval(u);

        let cdf_offset = unsafe { *self.cdf.add(offset) };
        let cdf_next = unsafe { *self.cdf.add(offset + 1) };

        let du = if cdf_next - cdf_offset > 0.0 {
            (u - cdf_offset) / (cdf_next - cdf_offset)
        } else {
            0.0
        };

        let delta = (self.max - self.min) / self.n as Float;
        let x = self.min + (offset as Float + du) * delta;

        let pdf = if self.func_integral > 0.0 {
            (unsafe { *self.func.add(offset) }) / self.func_integral
        } else {
            0.0
        };

        (x, pdf, offset)
    }
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct DevicePiecewiseConstant2D {
    pub conditional: *const DevicePiecewiseConstant1D, // Array of n_v conditionals
    pub marginal: DevicePiecewiseConstant1D,
    pub n_u: u32,
    pub n_v: u32,
}

impl DevicePiecewiseConstant2D {
    // pub fn resolution(&self) -> Point2i {
    //     Point2i::new(
    //         self.p_conditional_v[0u32].size() as i32,
    //         self.p_conditional_v[1u32].size() as i32,
    //     )
    // }

    pub fn integral(&self) -> f32 {
        self.p_marginal.integral()
    }

    pub fn sample(&self, u: Point2f) -> (Point2f, f32, Point2i) {
        let (d1, pdf1, off_y) = self.p_marginal.sample(u.y());
        let (d0, pdf0, off_x) = self.p_conditional_v[off_y].sample(u.x());
        let pdf = pdf0 * pdf1;
        let offset = Point2i::new(off_x as i32, off_y as i32);
        (Point2f::new(d0, d1), pdf, offset)
    }

    pub fn pdf(&self, p: Point2f) -> Float {
        // Find which row
        let delta_v = 1.0 / self.n_v as Float;
        let v_offset = ((p.y() * self.n_v as Float) as usize).min(self.n_v as usize - 1);

        let conditional = unsafe { &*self.conditional.add(v_offset) };

        // Find which column
        let delta_u = 1.0 / self.n_u as Float;
        let u_offset = ((p.x() * self.n_u as Float) as usize).min(self.n_u as usize - 1);

        let func_val = unsafe { *conditional.func.add(u_offset) };

        func_val / self.marginal.func_integral
    }
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct SummedAreaTable {
    pub sum: Array2D<f64>,
}

impl SummedAreaTable {
    // pub fn new(values: &Array2D<Float>) -> Self {
    //     let width = values.x_size();
    //     let height = values.y_size();
    //
    //     let mut sum = Array2D::<f64>::new_with_dims(width, height);
    //     sum[(0, 0)] = values[(0, 0)] as f64;
    //
    //     for x in 1..width {
    //         sum[(x, 0)] = values[(x as i32, 0)] as f64 + sum[(x - 1, 0)];
    //     }
    //
    //     for y in 1..height {
    //         sum[(0, y)] = values[(0, y as i32)] as f64 + sum[(0, y - 1)];
    //     }
    //
    //     for y in 1..height {
    //         for x in 1..width {
    //             let term = values[(x as i32, y as i32)] as f64;
    //             let left = sum[(x - 1, y)];
    //             let up = sum[(x, y - 1)];
    //             let diag = sum[(x - 1, y - 1)];
    //
    //             sum[(x, y)] = term + left + up - diag;
    //         }
    //     }
    //
    //     Self { sum }
    // }
    //
    pub fn integral(&self, extent: Bounds2f) -> Float {
        let s = self.lookup(extent.p_max.x(), extent.p_max.y())
            - self.lookup(extent.p_min.x(), extent.p_max.y())
            + self.lookup(extent.p_min.x(), extent.p_min.y())
            - self.lookup(extent.p_max.x(), extent.p_min.y());

        let total_area = (self.sum.x_size() * self.sum.y_size()) as f64;

        (s / total_area).max(0.0) as Float
    }

    fn lookup(&self, mut x: Float, mut y: Float) -> f64 {
        x *= self.sum.x_size() as Float;
        y *= self.sum.y_size() as Float;

        let x0 = x as i32;
        let y0 = y as i32;

        let v00 = self.lookup_int(x0, y0);
        let v10 = self.lookup_int(x0 + 1, y0);
        let v01 = self.lookup_int(x0, y0 + 1);
        let v11 = self.lookup_int(x0 + 1, y0 + 1);

        let dx = (x - x0 as Float) as f64;
        let dy = (y - y0 as Float) as f64;

        (1.0 - dx) * (1.0 - dy) * v00
            + dx * (1.0 - dy) * v10
            + (1.0 - dx) * dy * v01
            + dx * dy * v11
    }

    fn lookup_int(&self, x: i32, y: i32) -> f64 {
        if x == 0 || y == 0 {
            return 0.0;
        }

        let ix = (x - 1).min(self.sum.x_size() as i32 - 1);
        let iy = (y - 1).min(self.sum.y_size() as i32 - 1);

        self.sum[(ix, iy)]
    }
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct WindowedPiecewiseConstant2D {
    sat: SummedAreaTable,
    func: Array2D<Float>,
}

impl WindowedPiecewiseConstant2D {
    // pub fn new(func: Array2D<Float>) -> Self {
    //     let sat = SummedAreaTable::new(&func);
    //     Self { sat, func }
    // }

    pub fn sample(&self, u: Point2f, b: Bounds2f) -> Option<(Point2f, Float)> {
        let b_int = self.sat.integral(b);
        if b_int == 0.0 {
            return None;
        }

        let px = |x: Float| -> Float {
            let mut bx = b;
            bx.p_max[0] = x;
            self.sat.integral(bx) / b_int
        };

        let nx = self.func.x_size();
        let px_val = Self::sample_bisection(px, u.x(), b.p_min.x(), b.p_max.x(), nx);

        let nx_f = nx as Float;

        let x_start = (px_val * nx_f).floor() / nx_f;
        let x_end = (px_val * nx_f).ceil() / nx_f;

        let mut b_cond = Bounds2f::from_points(
            Point2f::new(x_start, b.p_min.y()),
            Point2f::new(x_end, b.p_max.y()),
        );

        if b_cond.p_min.x() == b_cond.p_max.x() {
            b_cond.p_max[0] += 1.0 / nx_f;
        }

        let cond_integral = self.sat.integral(b_cond);
        if cond_integral == 0.0 {
            return None;
        }

        let py = |y: Float| -> Float {
            let mut by = b_cond;
            by.p_max[1] = y;
            self.sat.integral(by) / cond_integral
        };

        let ny = self.func.y_size();
        let py_val = Self::sample_bisection(py, u.y(), b.p_min.y(), b.p_max.y(), ny);

        let p = Point2f::new(px_val, py_val);

        let pdf = self.eval(p) / b_int;

        Some((p, pdf))
    }

    pub fn pdf(&self, p: Point2f, b: Bounds2f) -> Float {
        let func_int = self.sat.integral(b);
        if func_int == 0.0 {
            return 0.0;
        }
        self.eval(p) / func_int
    }

    fn eval(&self, p: Point2f) -> Float {
        let nx = self.func.x_size();
        let ny = self.func.y_size();

        let ix = ((p.x() * nx as Float) as i32).min(nx as i32 - 1).max(0);
        let iy = ((p.y() * ny as Float) as i32).min(ny as i32 - 1).max(0);

        self.func[(ix, iy)]
    }

    fn sample_bisection<F>(p_func: F, u: Float, mut min: Float, mut max: Float, n: u32) -> Float
    where
        F: Fn(Float) -> Float,
    {
        let n_f = n as Float;

        while (n_f * max).ceil() - (n_f * min).floor() > 1.0 {
            debug_assert!(p_func(min) <= u + 1e-5);
            debug_assert!(p_func(max) >= u - 1e-5);

            let mid = (min + max) / 2.0;
            if p_func(mid) > u {
                max = mid;
            } else {
                min = mid;
            }
        }

        let numerator = u - p_func(min);
        let denominator = p_func(max) - p_func(min);

        let t = if denominator == 0.0 {
            0.5
        } else {
            numerator / denominator
        };

        let res = lerp(t, min, max);
        res.clamp(min, max)
    }
}

#[repr(C)]
#[derive(Debug, Clone, Copy)]
pub struct Bin {
    pub q: Float,
    pub p: Float,
    pub alias: u32,
}

#[repr(C)]
#[derive(Copy, Debug, Clone)]
pub struct AliasTable {
    pub bins: *const Bin,
    pub size: u32,
}

unsafe impl Send for AliasTable {}
unsafe impl Sync for AliasTable {}

impl AliasTable {
    #[inline(always)]
    fn bin(&self, idx: u32) -> &Bin {
        unsafe { &*self.bins.add(idx as usize) }
    }

    pub fn size(&self) -> u32 {
        self.size as u32
    }

    pub fn pmf(&self, index: u32) -> Float {
        if index >= self.size() {
            return 0.0;
        }
        self.bin(index).p
    }

    pub fn sample(&self, u: Float) -> (u32, Float, Float) {
        if self.size == 0 {
            return (0, 0.0, 0.0);
        }

        let n = self.size as Float;

        let val = u * n;
        let offset = (val.min(n - 1.0)) as u32;

        let up = (val - (offset as Float)).min(ONE_MINUS_EPSILON);

        let bin = self.bin(offset);

        if up < bin.q {
            debug_assert!(bin.p > 0.0);

            let pmf = bin.p;

            let u_remapped = (up / bin.q).min(ONE_MINUS_EPSILON);

            (offset, pmf, u_remapped)
        } else {
            let alias_idx = bin.alias;

            let alias_p = self.bin(alias_idx).p;
            debug_assert!(alias_p > 0.0);

            let u_remapped = ((up - bin.q) / (1.0 - bin.q)).min(ONE_MINUS_EPSILON);

            (alias_idx, alias_p, u_remapped)
        }
    }
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct PiecewiseLinear2D<const N: usize> {
    pub size: Vector2i,
    pub inv_patch_size: Vector2f,
    pub param_size: [u32; N],
    pub param_strides: [u32; N],
    pub param_values: [Ptr<Float>; N],
    pub data: Ptr<Float>,
    pub marginal_cdf: Ptr<Float>,
    pub conditional_cdf: Ptr<Float>,
}

impl<const N: usize> PiecewiseLinear2D<N> {
    pub fn sample(&self, mut sample: Point2f, params: [Float; N]) -> PLSample {
        sample = Point2f::new(
            sample.x().clamp(0.0, ONE_MINUS_EPSILON),
            sample.y().clamp(0.0, ONE_MINUS_EPSILON),
        );

        let (slice_offset, param_weights) = self.get_slice_info(params);
        let slice_size = (self.size.x() * self.size.y()) as u32;
        let marginal_size = self.size.y() as u32;
        let conditional_size = slice_size;
        let marginal_offset = slice_offset * marginal_size;
        let conditional_offset = slice_offset * conditional_size;
        let fetch_marginal = |idx: u32| {
            self.lookup(
                self.marginal_cdf,
                marginal_offset + idx,
                marginal_size,
                &param_weights,
            )
        };
        let row = find_interval(marginal_size, |idx| fetch_marginal(idx) <= sample.y());
        let marginal_cdf_row = fetch_marginal(row);
        sample[1] -= marginal_cdf_row;
        let r0 = self.lookup(
            self.conditional_cdf,
            conditional_offset + (row + 1) * self.size.x() as u32 - 1,
            conditional_size,
            &param_weights,
        );
        let r1 = self.lookup(
            self.conditional_cdf,
            conditional_offset + (row + 2) * self.size.x() as u32 - 1,
            conditional_size,
            &param_weights,
        );
        let delta_r = r1 - r0;
        let is_const = delta_r.abs() < 1e-4 * (r0 + r1).abs();
        sample[1] /= if is_const { r0 + r1 } else { delta_r };
        if !is_const {
            sample[1] = r0 - safe_sqrt(r0 * r0 - 2.0 * sample.y() * delta_r);
        } else {
            sample[1] *= 2.0;
        }
        sample[0] *= (1.0 - sample.y()) * r0 + sample.y() * r1;
        let conditional_row_offset = conditional_offset + row * self.size.x() as u32;
        let fetch_conditional = |idx: u32| {
            let v0 = self.lookup(
                self.conditional_cdf,
                conditional_row_offset + idx,
                conditional_size,
                &param_weights,
            );
            let v1 = self.lookup(
                self.conditional_cdf,
                conditional_row_offset + idx + self.size.x() as u32,
                conditional_size,
                &param_weights,
            );
            (1.0 - sample.y()) * v0 + sample.y() * v1
        };
        let col = find_interval(self.size.x() as u32, |idx| {
            fetch_conditional(idx) <= sample.x()
        });
        sample[0] -= fetch_conditional(col);
        let offset = conditional_row_offset + col;
        let v00 = self.lookup(self.data, offset, slice_size, &param_weights);
        let v10 = self.lookup(self.data, offset + 1, slice_size, &param_weights);
        let v01 = self.lookup(
            self.data,
            offset + self.size.x() as u32,
            slice_size,
            &param_weights,
        );
        let v11 = self.lookup(
            self.data,
            offset + self.size.x() as u32 + 1,
            slice_size,
            &param_weights,
        );
        let c0 = (1.0 - sample.y()) * v00 + sample.y() * v01;
        let c1 = (1.0 - sample.y()) * v10 + sample.y() * v11;
        let delta_c = c1 - c0;
        let is_const = delta_c.abs() < 1e-4 * (c0 + c1).abs();
        sample[0] /= if is_const { c0 + c1 } else { delta_c };
        if !is_const {
            sample[0] = c0 - safe_sqrt(c0 * c0 - 2.0 * sample.x() * delta_c);
        } else {
            sample[0] *= 2.0;
        }
        let pdf = (1.0 - sample.x()) * c0 + sample.x() * c1;
        let offset_point = Point2f::new(col as Float, row as Float);

        PLSample {
            p: Point2f::new(
                sample.x() * self.inv_patch_size.x() + offset_point.x() * self.inv_patch_size.x(),
                sample.y() * self.inv_patch_size.y() + offset_point.y() * self.inv_patch_size.y(),
            ),
            pdf: pdf * self.inv_patch_size.x() * self.inv_patch_size.y(),
        }
    }

    pub fn invert(&self, mut p: Point2f, params: [Float; N]) -> PLSample {
        let (slice_offset, param_weights) = self.get_slice_info(params);

        p[0] *= self.inv_patch_size.x();
        p[1] *= self.inv_patch_size.y();
        let col = (p.x() as i32).min(self.size.x() - 2);
        let row = (p.y() as i32).min(self.size.y() - 2);
        let frac = Point2f::new(p.x() - col as Float, p.y() - row as Float);
        let slice_size = (self.size.x() * self.size.y()) as u32;
        let offset = slice_offset * slice_size + (row * self.size.x() + col) as u32;
        let v00 = self.lookup(self.data, offset, slice_size, &param_weights);
        let v10 = self.lookup(self.data, offset + 1, slice_size, &param_weights);
        let v01 = self.lookup(
            self.data,
            offset + self.size.x() as u32,
            slice_size,
            &param_weights,
        );
        let v11 = self.lookup(
            self.data,
            offset + self.size.x() as u32 + 1,
            slice_size,
            &param_weights,
        );
        let w1 = frac;
        let w0 = Point2f::new(1.0 - w1.x(), 1.0 - w1.y());
        let c0 = w0.y() * v00 + w1.y() * v01;
        let c1 = w0.y() * v10 + w1.y() * v11;
        let pdf = w0.x() * c0 + w1.x() * c1;
        let mut u = Point2f::new(0.0, 0.0);
        u[0] = w1.x() * (c0 + 0.5 * w1.x() * (c1 - c0));
        let conditional_row_offset = slice_offset * slice_size + (row * self.size.x()) as u32;
        let v0 = self.lookup(
            self.conditional_cdf,
            conditional_row_offset + col as u32,
            slice_size,
            &param_weights,
        );
        let v1 = self.lookup(
            self.conditional_cdf,
            conditional_row_offset + col as u32 + self.size.x() as u32,
            slice_size,
            &param_weights,
        );
        u[0] += (1.0 - u.y()) * v0 + u.y() * v1;
        let r0 = self.lookup(
            self.conditional_cdf,
            conditional_row_offset + self.size.x() as u32 - 1,
            slice_size,
            &param_weights,
        );
        let r1 = self.lookup(
            self.conditional_cdf,
            conditional_row_offset + self.size.x() as u32 * 2 - 1,
            slice_size,
            &param_weights,
        );
        u[0] /= (1.0 - u.y()) * r0 + u.y() * r1;
        u[1] = w1.y() * (r0 + 0.5 * w1.y() * (r1 - r0));
        let marginal_offset = slice_offset * self.size.y() as u32 + row as u32;
        u[1] += self.lookup(
            self.marginal_cdf,
            marginal_offset,
            self.size.y() as u32,
            &param_weights,
        );

        PLSample {
            p: u,
            pdf: pdf * (self.inv_patch_size.x() * self.inv_patch_size.y()),
        }
    }

    pub fn evaluate(&self, p: Point2f, params: [Float; N]) -> Float {
        let (slice_offset, param_weights) = self.get_slice_info(params);
        let pos = Point2f::new(
            p.x() * self.inv_patch_size.x(),
            p.y() * self.inv_patch_size.y(),
        );
        let col = (pos.x() as i32).min(self.size.x() - 2);
        let row = (pos.y() as i32).min(self.size.y() - 2);
        let w1 = Point2f::new(pos.x() - col as Float, pos.y() - row as Float);
        let w0 = Point2f::new(1.0 - w1.x(), 1.0 - w1.y());
        let slice_size = (self.size.x() * self.size.y()) as u32;
        let offset = slice_offset * slice_size + (row * self.size.x() + col) as u32;
        let v00 = self.lookup(self.data, offset, slice_size, &param_weights);
        let v10 = self.lookup(self.data, offset + 1, slice_size, &param_weights);
        let v01 = self.lookup(
            self.data,
            offset + self.size.x() as u32,
            slice_size,
            &param_weights,
        );
        let v11 = self.lookup(
            self.data,
            offset + self.size.x() as u32 + 1,
            slice_size,
            &param_weights,
        );

        let pdf = w0.y() * (w0.x() * v00 + w1.x() * v10) + w1.y() * (w0.x() * v01 + w1.x() * v11);
        pdf * self.inv_patch_size.x() * self.inv_patch_size.y()
    }

    #[inline(always)]
    fn get_param_value(&self, dim: usize, idx: usize) -> Float {
        // Safety: Bounds checking against param_size ensures this is valid
        unsafe { *self.param_values[dim].0.add(idx) }
    }

    fn get_slice_info(&self, params: [Float; N]) -> (u32, [(Float, Float); N]) {
        let mut param_weight = [(0.0, 0.0); N];
        let mut slice_offset = 0u32;

        for dim in 0..N {
            let size = self.param_size[dim];

            if size == 1 {
                param_weight[dim] = (1.0, 0.0);
                continue;
            }

            let param_index = find_interval(size, |idx| {
                self.get_param_value(dim, idx as usize) <= params[dim]
            }) as u32;

            let p0 = self.get_param_value(dim, param_index as usize);

            let next_index = (param_index + 1).min(size - 1);
            let p1 = self.get_param_value(dim, next_index as usize);

            let w1 = if p1 != p0 {
                ((params[dim] - p0) / (p1 - p0)).clamp(0.0, 1.0)
            } else {
                0.0
            };

            param_weight[dim] = (1.0 - w1, w1);

            slice_offset += self.param_strides[dim] * param_index;
        }

        (slice_offset, param_weight)
    }

    fn lookup(
        &self,
        data: Ptr<Float>,
        i0: u32,
        size: u32,
        param_weight: &[(Float, Float); N],
    ) -> Float {
        let mut result: Float = 0.0;
        let num_corners = 1usize << N; // 2^N
        for mask in 0..num_corners {
            let mut offset = 0u32;
            let mut weight: Float = 1.0;
            let mut current_mask = mask;
            for (weights, &stride) in param_weight.iter().zip(&self.param_strides) {
                if (current_mask & 1) == 1 {
                    offset += stride * size;
                    weight *= weights.1;
                } else {
                    weight *= weights.0;
                }

                current_mask >>= 1;
            }
            let idx = (i0 + offset) as usize;
            let val = unsafe { *data.0.add(idx) };
            result += weight * val;
        }
        result
    }
}

#[repr(C)]
#[derive(Clone, Copy, Debug)]
pub struct WeightedReservoirSampler<T> {
    rng: Rng,
    weight_sum: Float,
    reservoir_weight: Float,
    reservoir: T,
}

impl<T: Default + Copy> Default for WeightedReservoirSampler<T> {
    fn default() -> Self {
        Self {
            rng: Rng::default(),
            weight_sum: 0.0,
            reservoir_weight: 0.0,
            reservoir: T::default(),
        }
    }
}

impl<T: Default + Copy> WeightedReservoirSampler<T> {
    pub fn new(seed: u64) -> Self {
        let mut rng = Rng::default();
        rng.set_sequence(seed);
        Self {
            rng,
            weight_sum: 0.0,
            reservoir_weight: 0.0,
            reservoir: T::default(),
        }
    }

    pub fn seed(&mut self, seed: u64) {
        self.rng.set_sequence(seed);
    }

    pub fn add(&mut self, sample: T, weight: Float) -> bool {
        self.weight_sum += weight;

        let p = weight / self.weight_sum;
        if self.rng.uniform::<Float>() < p {
            self.reservoir = sample;
            self.reservoir_weight = weight;
            return true;
        }

        // debug_assert!(self.weight_sum < 1e80);
        false
    }

    pub fn add_closure<F>(&mut self, func: F, weight: Float) -> bool
    where
        F: FnOnce() -> T,
    {
        self.weight_sum += weight;

        let p = weight / self.weight_sum;
        if self.rng.uniform::<Float>() < p {
            self.reservoir = func();
            self.reservoir_weight = weight;
            return true;
        }

        // debug_assert!(self.weight_sum < 1e80);
        false
    }

    pub fn copy_from(&mut self, other: &Self)
    where
        T: Clone,
    {
        self.weight_sum = other.weight_sum;
        self.reservoir = other.reservoir.clone();
        self.reservoir_weight = other.reservoir_weight;
    }

    pub fn has_sample(&self) -> bool {
        self.weight_sum > 0.0
    }

    pub fn get_sample(&self) -> Option<&T> {
        if self.has_sample() {
            Some(&self.reservoir)
        } else {
            None
        }
    }

    pub fn sample_probability(&self) -> Float {
        if self.weight_sum == 0.0 {
            0.0
        } else {
            self.reservoir_weight / self.weight_sum
        }
    }

    pub fn weight_sum(&self) -> Float {
        self.weight_sum
    }

    pub fn reset(&mut self) {
        self.reservoir_weight = 0.0;
        self.weight_sum = 0.0;
        self.reservoir = T::default();
    }

    pub fn merge(&mut self, other: &Self) {
        if other.has_sample() {
            self.add(other.reservoir, other.weight_sum);
        }
    }
}