Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions devices/rtx/device/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,9 @@ set(SOURCES
spatial_field/StructuredRectilinearSampler.cpp
spatial_field/StructuredRegularSampler.cpp
spatial_field/space_skipping/UniformGrid.cu
spatial_field/space_skipping/UniformGridCustom.cu
spatial_field/CustomFieldSampler.cpp
spatial_field/CustomField.cpp
spatial_field/RegisterCustomFields.cpp

surface/Surface.cpp
Expand Down
24 changes: 17 additions & 7 deletions devices/rtx/device/gpu/sbt.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,15 +52,24 @@ enum class SurfaceShaderEntryPoints

enum class SpatialFieldSamplerEntryPoints
{
Init = 0,
Sample,
// Heavy Woodcock-loop bodies live as per-variant direct-callables in each
// sampler PTX module. Renderer wrappers in volumeIntegration.h compute
// (samplerCallableIndex + offset) and invoke via optixDirectCall.
SampleDistance,
// sampler PTX module. Present for every field family. Renderer wrappers in
// volumeIntegration.h compute (samplerCallableIndex + offset) and invoke via
// optixDirectCall.
SampleDistance = 0,
RatioTrackTransmittance,
RayMarchVolume,
Count
Count, // built-in families reserve this many callable slots

// Custom-field-only hooks. Built-in families sample inline — their concrete
// state type resolves sampleValue/sampleNormal by ADL inside the Woodcock
// body — so they never dispatch these through the SBT. Custom fields route
// back through the SBT (callable-in-callable) because the user sampler is
// compiled into these callables.
Init = Count,
SampleValue,
SampleNormal,
CustomCount // custom field reserves this many callable slots
};

enum class SbtCallableEntryPoints : uint32_t
Expand Down Expand Up @@ -96,7 +105,8 @@ enum class SbtCallableEntryPoints : uint32_t
// based on CustomFieldType in the field data
SpatialFieldSamplerCustom = SpatialFieldSamplerNvdbRectilinearFloat
+ int(SpatialFieldSamplerEntryPoints::Count),
Last = SpatialFieldSamplerCustom + int(SpatialFieldSamplerEntryPoints::Count),
Last = SpatialFieldSamplerCustom
+ int(SpatialFieldSamplerEntryPoints::CustomCount),
};

} // namespace visrtx
73 changes: 34 additions & 39 deletions devices/rtx/device/gpu/volumeIntegrationDetail.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@
#pragma once

// Shared helpers + Woodcock-body templates for the per-sampler callables.
// Templates parameterised on state type and sample / sampleWithGradient
// closures; each sampler _ptx.cu passes inline lambdas so codegen stays
// monomorphic per variant.
// Templates are parameterised on the sampler state type only; they call the
// shared sampleValue / sampleNormal overloads, resolved by ADL on the concrete
// state type, so codegen stays monomorphic per variant.

#include "gpu/gpu_decl.h"
#include "gpu/gpu_math.h"
Expand Down Expand Up @@ -154,17 +154,13 @@ VISRTX_DEVICE bool applyShadowRussianRoulette(
return false;
}

// `sampleWithGradient` is a __device__ lambda capturing nothing —
// compiler inlines through it.
template <typename State, typename SampleWithGradientFn>
template <typename State>
VISRTX_DEVICE vec3 computeWorldNormal(const State &samplerState,
const SpatialFieldGPUData &field,
const vec3 &localPos,
const mat3x4 &worldToObject,
SampleWithGradientFn sampleWithGradient)
const mat3x4 &worldToObject)
{
vec3 localGradient(0.f);
sampleWithGradient(samplerState, field, localPos, localGradient);
const vec3 localGradient = sampleNormal(samplerState, field, localPos);
constexpr float MIN_GRADIENT_LENGTH_SQ = 1e-12f;
if (glm::dot(localGradient, localGradient) <= MIN_GRADIENT_LENGTH_SQ)
return vec3(0.f);
Expand All @@ -179,9 +175,9 @@ VISRTX_DEVICE vec3 computeWorldNormal(const State &samplerState,

// ---------------------------------------------------------------------------
// Woodcock-loop body templates. Each sampler family's per-variant callable
// passes its inline sample / sampleWithGradient closures (typically
// __device__ lambdas) and an already-inited state. The compiler inlines
// through the lambdas, so each variant's hot path is monomorphic.
// passes an already-inited state; the body calls the shared sampleValue /
// sampleNormal overloads for that state type, so each variant's hot path is
// monomorphic.

// Distance sampling via decomposition tracking (Kutz/Thiery/Novák/Iwasaki 2017):
// inside each macrocell, split σ_t = σ_c + σ_r where σ_c = per-cell min
Expand All @@ -190,17 +186,15 @@ VISRTX_DEVICE vec3 computeWorldNormal(const State &samplerState,
// Whichever fires first is the next event; both branches use the local σ_t at
// the event point for albedo/extinction. Degenerates to pure delta tracking
// when σ_c = 0 (sharp TF) and to closed-form analytic flight when σ_r = 0.
template <typename State, typename SampleFn, typename SampleWithGradientFn>
template <typename State>
VISRTX_DEVICE float woodcockSampleDistance(ScreenSample &ss,
const VolumeHit &hit,
State &samplerState,
const SpatialFieldGPUData &field,
vec3 &albedo,
float &extinction,
bool &didScatter,
vec3 *normal,
SampleFn sample,
SampleWithGradientFn sampleWithGradient)
vec3 *normal)
{
const auto &volume = *hit.volume;
auto &svv = volume.data.tf1d;
Expand Down Expand Up @@ -251,7 +245,7 @@ VISRTX_DEVICE float woodcockSampleDistance(ScreenSample &ss,
break;

const vec3 p = objRay.org + objRay.dir * t;
const float s = sample(samplerState, field, p);
const float s = sampleValue(samplerState, field, p);
if (glm::isnan(s))
continue;

Expand All @@ -276,7 +270,7 @@ VISRTX_DEVICE float woodcockSampleDistance(ScreenSample &ss,
// bound on σ_t).
if (!didScatter && t_control < trav.tExit) {
const vec3 p = objRay.org + objRay.dir * t_control;
const float s = sample(samplerState, field, p);
const float s = sampleValue(samplerState, field, p);
if (!glm::isnan(s)) {
const vec4 co = classifySample(volume, s);
const float σ_t_p =
Expand All @@ -295,11 +289,8 @@ VISRTX_DEVICE float woodcockSampleDistance(ScreenSample &ss,
}

if (normal && didScatter) {
*normal = computeWorldNormal(samplerState,
field,
scatterPos,
hit.instance->worldToObject,
sampleWithGradient);
*normal = computeWorldNormal(
samplerState, field, scatterPos, hit.instance->worldToObject);
}

return scatterT;
Expand All @@ -312,13 +303,12 @@ VISRTX_DEVICE float woodcockSampleDistance(ScreenSample &ss,
// σ_r_maj = σ_maj - σ_c, accumulating attenuation *= (1 - σ_r / σ_r_maj) per
// candidate. Tighter than plain ratio tracking when σ_c > 0; degenerates to
// plain ratio tracking when σ_c = 0.
template <typename State, typename SampleFn>
template <typename State>
VISRTX_DEVICE void woodcockRatioTrackTransmittance(ScreenSample &ss,
const VolumeHit &hit,
State &samplerState,
const SpatialFieldGPUData &field,
vec3 &attenuation,
SampleFn sample)
vec3 &attenuation)
{
const auto &volume = *hit.volume;
auto &svv = volume.data.tf1d;
Expand Down Expand Up @@ -363,7 +353,7 @@ VISRTX_DEVICE void woodcockRatioTrackTransmittance(ScreenSample &ss,
break;

const vec3 p = objRay.org + objRay.dir * t;
const float s = sample(samplerState, field, p);
const float s = sampleValue(samplerState, field, p);
if (glm::isnan(s))
continue;

Expand All @@ -390,17 +380,15 @@ VISRTX_DEVICE void woodcockRatioTrackTransmittance(ScreenSample &ss,
// this is deterministic emission-absorption ray marching: the macrocell
// `maxOpacity` is used only as an empty-cell flag, never as a majorant for
// null-collision sampling.
template <typename State, typename SampleFn, typename SampleWithGradientFn>
template <typename State>
VISRTX_DEVICE float latticeRayMarchVolume(ScreenSample &ss,
const VolumeHit &hit,
State &samplerState,
const SpatialFieldGPUData &field,
vec3 *color,
vec3 *normal,
float &opacity,
float invSamplingRate,
SampleFn sample,
SampleWithGradientFn sampleWithGradient)
float invSamplingRate)
{
const auto &volume = *hit.volume;
auto &svv = volume.data.tf1d;
Expand All @@ -421,7 +409,9 @@ VISRTX_DEVICE float latticeRayMarchVolume(ScreenSample &ss,
float depth = std::numeric_limits<float>::max();

constexpr float MIN_OPACITY_THRESHOLD = 1e-2f;
constexpr float MAX_OPACITY_THRESHOLD = 0.99f;
// Early-out once the segment is effectively opaque. Kept moderately high so
// the residual transmittance zeroed below is genuinely negligible.
constexpr float MAX_OPACITY_THRESHOLD = 0.999f;

// Single stratified jitter at the segment start.
const float jitter =
Expand Down Expand Up @@ -452,7 +442,7 @@ VISRTX_DEVICE float latticeRayMarchVolume(ScreenSample &ss,
break;
const vec3 p = objRay.org + objRay.dir * nextSampleT;

const float s = sample(samplerState, field, p);
const float s = sampleValue(samplerState, field, p);
if (!glm::isnan(s)) {
const vec4 co = classifySample(volume, s);

Expand All @@ -474,15 +464,20 @@ VISRTX_DEVICE float latticeRayMarchVolume(ScreenSample &ss,
trav.next();
}

// The early-out treats the segment as opaque, but front-to-back compositing
// leaves a small residual transmittance (1 - opacity). Against a low-dynamic-
// range background it's invisible; against an HDR background (bright sky, sun)
// even ~0.1% leaks visibly and makes the volume look more transparent than it
// should be.
if (opacity >= MAX_OPACITY_THRESHOLD)
opacity = 1.0f;

if (normal) {
*normal = vec3(0.f);
if (depth < std::numeric_limits<float>::max()) {
const vec3 p = objRay.org + objRay.dir * depth;
*normal = computeWorldNormal(samplerState,
field,
p,
hit.instance->worldToObject,
sampleWithGradient);
*normal = computeWorldNormal(
samplerState, field, p, hit.instance->worldToObject);
}
}

Expand Down
Loading
Loading