diff --git a/devices/rtx/device/CMakeLists.txt b/devices/rtx/device/CMakeLists.txt
index 53db3fb81..4724e4d55 100644
--- a/devices/rtx/device/CMakeLists.txt
+++ b/devices/rtx/device/CMakeLists.txt
@@ -196,7 +196,9 @@ set(SOURCES
   spatial_field/StructuredRectilinearSampler.cpp
   spatial_field/StructuredRegularSampler.cpp
   spatial_field/space_skipping/UniformGrid.cu
+  spatial_field/space_skipping/UniformGridCustom.cu
   spatial_field/CustomFieldSampler.cpp
+  spatial_field/CustomField.cpp
   spatial_field/RegisterCustomFields.cpp
 
   surface/Surface.cpp
diff --git a/devices/rtx/device/gpu/sbt.h b/devices/rtx/device/gpu/sbt.h
index 1f0c92645..40222ab68 100644
--- a/devices/rtx/device/gpu/sbt.h
+++ b/devices/rtx/device/gpu/sbt.h
@@ -52,15 +52,24 @@ enum class SurfaceShaderEntryPoints
 
 enum class SpatialFieldSamplerEntryPoints
 {
-  Init = 0,
-  Sample,
   // Heavy Woodcock-loop bodies live as per-variant direct-callables in each
-  // sampler PTX module. Renderer wrappers in volumeIntegration.h compute
-  // (samplerCallableIndex + offset) and invoke via optixDirectCall.
-  SampleDistance,
+  // sampler PTX module. Present for every field family. Renderer wrappers in
+  // volumeIntegration.h compute (samplerCallableIndex + offset) and invoke via
+  // optixDirectCall.
+  SampleDistance = 0,
   RatioTrackTransmittance,
   RayMarchVolume,
-  Count
+  Count, // built-in families reserve this many callable slots
+
+  // Custom-field-only hooks. Built-in families sample inline — their concrete
+  // state type resolves sampleValue/sampleNormal by ADL inside the Woodcock
+  // body — so they never dispatch these through the SBT. Custom fields route
+  // back through the SBT (callable-in-callable) because the user sampler is
+  // compiled into these callables.
+  Init = Count,
+  SampleValue,
+  SampleNormal,
+  CustomCount // custom field reserves this many callable slots
 };
 
 enum class SbtCallableEntryPoints : uint32_t
@@ -96,7 +105,8 @@ enum class SbtCallableEntryPoints : uint32_t
   // based on CustomFieldType in the field data
   SpatialFieldSamplerCustom = SpatialFieldSamplerNvdbRectilinearFloat
       + int(SpatialFieldSamplerEntryPoints::Count),
-  Last = SpatialFieldSamplerCustom + int(SpatialFieldSamplerEntryPoints::Count),
+  Last = SpatialFieldSamplerCustom
+      + int(SpatialFieldSamplerEntryPoints::CustomCount),
 };
 
 } // namespace visrtx
diff --git a/devices/rtx/device/gpu/volumeIntegrationDetail.h b/devices/rtx/device/gpu/volumeIntegrationDetail.h
index b1f6f980b..4b7655d1c 100644
--- a/devices/rtx/device/gpu/volumeIntegrationDetail.h
+++ b/devices/rtx/device/gpu/volumeIntegrationDetail.h
@@ -32,9 +32,9 @@
 #pragma once
 
 // Shared helpers + Woodcock-body templates for the per-sampler callables.
-// Templates parameterised on state type and sample / sampleWithGradient
-// closures; each sampler _ptx.cu passes inline lambdas so codegen stays
-// monomorphic per variant.
+// Templates are parameterised on the sampler state type only; they call the
+// shared sampleValue / sampleNormal overloads, resolved by ADL on the concrete
+// state type, so codegen stays monomorphic per variant.
 
 #include "gpu/gpu_decl.h"
 #include "gpu/gpu_math.h"
@@ -154,17 +154,13 @@ VISRTX_DEVICE bool applyShadowRussianRoulette(
   return false;
 }
 
-// `sampleWithGradient` is a __device__ lambda capturing nothing —
-// compiler inlines through it.
-template <typename State, typename SampleWithGradientFn>
+template <typename State>
 VISRTX_DEVICE  vec3 computeWorldNormal(const State &samplerState,
     const SpatialFieldGPUData &field,
     const vec3 &localPos,
-    const mat3x4 &worldToObject,
-    SampleWithGradientFn sampleWithGradient)
+    const mat3x4 &worldToObject)
 {
-  vec3 localGradient(0.f);
-  sampleWithGradient(samplerState, field, localPos, localGradient);
+  const vec3 localGradient = sampleNormal(samplerState, field, localPos);
   constexpr float MIN_GRADIENT_LENGTH_SQ = 1e-12f;
   if (glm::dot(localGradient, localGradient) <= MIN_GRADIENT_LENGTH_SQ)
     return vec3(0.f);
@@ -179,9 +175,9 @@ VISRTX_DEVICE  vec3 computeWorldNormal(const State &samplerState,
 
 // ---------------------------------------------------------------------------
 // Woodcock-loop body templates. Each sampler family's per-variant callable
-// passes its inline sample / sampleWithGradient closures (typically
-// __device__ lambdas) and an already-inited state. The compiler inlines
-// through the lambdas, so each variant's hot path is monomorphic.
+// passes an already-inited state; the body calls the shared sampleValue /
+// sampleNormal overloads for that state type, so each variant's hot path is
+// monomorphic.
 
 // Distance sampling via decomposition tracking (Kutz/Thiery/Novák/Iwasaki 2017):
 // inside each macrocell, split σ_t = σ_c + σ_r where σ_c = per-cell min
@@ -190,7 +186,7 @@ VISRTX_DEVICE  vec3 computeWorldNormal(const State &samplerState,
 // Whichever fires first is the next event; both branches use the local σ_t at
 // the event point for albedo/extinction. Degenerates to pure delta tracking
 // when σ_c = 0 (sharp TF) and to closed-form analytic flight when σ_r = 0.
-template <typename State, typename SampleFn, typename SampleWithGradientFn>
+template <typename State>
 VISRTX_DEVICE  float woodcockSampleDistance(ScreenSample &ss,
     const VolumeHit &hit,
     State &samplerState,
@@ -198,9 +194,7 @@ VISRTX_DEVICE  float woodcockSampleDistance(ScreenSample &ss,
     vec3 &albedo,
     float &extinction,
     bool &didScatter,
-    vec3 *normal,
-    SampleFn sample,
-    SampleWithGradientFn sampleWithGradient)
+    vec3 *normal)
 {
   const auto &volume = *hit.volume;
   auto &svv = volume.data.tf1d;
@@ -251,7 +245,7 @@ VISRTX_DEVICE  float woodcockSampleDistance(ScreenSample &ss,
           break;
 
         const vec3 p = objRay.org + objRay.dir * t;
-        const float s = sample(samplerState, field, p);
+        const float s = sampleValue(samplerState, field, p);
         if (glm::isnan(s))
           continue;
 
@@ -276,7 +270,7 @@ VISRTX_DEVICE  float woodcockSampleDistance(ScreenSample &ss,
     // bound on σ_t).
     if (!didScatter && t_control < trav.tExit) {
       const vec3 p = objRay.org + objRay.dir * t_control;
-      const float s = sample(samplerState, field, p);
+      const float s = sampleValue(samplerState, field, p);
       if (!glm::isnan(s)) {
         const vec4 co = classifySample(volume, s);
         const float σ_t_p =
@@ -295,11 +289,8 @@ VISRTX_DEVICE  float woodcockSampleDistance(ScreenSample &ss,
   }
 
   if (normal && didScatter) {
-    *normal = computeWorldNormal(samplerState,
-        field,
-        scatterPos,
-        hit.instance->worldToObject,
-        sampleWithGradient);
+    *normal = computeWorldNormal(
+        samplerState, field, scatterPos, hit.instance->worldToObject);
   }
 
   return scatterT;
@@ -312,13 +303,12 @@ VISRTX_DEVICE  float woodcockSampleDistance(ScreenSample &ss,
 // σ_r_maj = σ_maj - σ_c, accumulating attenuation *= (1 - σ_r / σ_r_maj) per
 // candidate. Tighter than plain ratio tracking when σ_c > 0; degenerates to
 // plain ratio tracking when σ_c = 0.
-template <typename State, typename SampleFn>
+template <typename State>
 VISRTX_DEVICE  void woodcockRatioTrackTransmittance(ScreenSample &ss,
     const VolumeHit &hit,
     State &samplerState,
     const SpatialFieldGPUData &field,
-    vec3 &attenuation,
-    SampleFn sample)
+    vec3 &attenuation)
 {
   const auto &volume = *hit.volume;
   auto &svv = volume.data.tf1d;
@@ -363,7 +353,7 @@ VISRTX_DEVICE  void woodcockRatioTrackTransmittance(ScreenSample &ss,
           break;
 
         const vec3 p = objRay.org + objRay.dir * t;
-        const float s = sample(samplerState, field, p);
+        const float s = sampleValue(samplerState, field, p);
         if (glm::isnan(s))
           continue;
 
@@ -390,7 +380,7 @@ VISRTX_DEVICE  void woodcockRatioTrackTransmittance(ScreenSample &ss,
 // this is deterministic emission-absorption ray marching: the macrocell
 // `maxOpacity` is used only as an empty-cell flag, never as a majorant for
 // null-collision sampling.
-template <typename State, typename SampleFn, typename SampleWithGradientFn>
+template <typename State>
 VISRTX_DEVICE  float latticeRayMarchVolume(ScreenSample &ss,
     const VolumeHit &hit,
     State &samplerState,
@@ -398,9 +388,7 @@ VISRTX_DEVICE  float latticeRayMarchVolume(ScreenSample &ss,
     vec3 *color,
     vec3 *normal,
     float &opacity,
-    float invSamplingRate,
-    SampleFn sample,
-    SampleWithGradientFn sampleWithGradient)
+    float invSamplingRate)
 {
   const auto &volume = *hit.volume;
   auto &svv = volume.data.tf1d;
@@ -421,7 +409,9 @@ VISRTX_DEVICE  float latticeRayMarchVolume(ScreenSample &ss,
   float depth = std::numeric_limits<float>::max();
 
   constexpr float MIN_OPACITY_THRESHOLD = 1e-2f;
-  constexpr float MAX_OPACITY_THRESHOLD = 0.99f;
+  // Early-out once the segment is effectively opaque. Kept moderately high so
+  // the residual transmittance zeroed below is genuinely negligible.
+  constexpr float MAX_OPACITY_THRESHOLD = 0.999f;
 
   // Single stratified jitter at the segment start.
   const float jitter =
@@ -452,7 +442,7 @@ VISRTX_DEVICE  float latticeRayMarchVolume(ScreenSample &ss,
         break;
       const vec3 p = objRay.org + objRay.dir * nextSampleT;
 
-      const float s = sample(samplerState, field, p);
+      const float s = sampleValue(samplerState, field, p);
       if (!glm::isnan(s)) {
         const vec4 co = classifySample(volume, s);
 
@@ -474,15 +464,20 @@ VISRTX_DEVICE  float latticeRayMarchVolume(ScreenSample &ss,
     trav.next();
   }
 
+  // The early-out treats the segment as opaque, but front-to-back compositing
+  // leaves a small residual transmittance (1 - opacity). Against a low-dynamic-
+  // range background it's invisible; against an HDR background (bright sky, sun)
+  // even ~0.1% leaks visibly and makes the volume look more transparent than it
+  // should be.
+  if (opacity >= MAX_OPACITY_THRESHOLD)
+    opacity = 1.0f;
+
   if (normal) {
     *normal = vec3(0.f);
     if (depth < std::numeric_limits<float>::max()) {
       const vec3 p = objRay.org + objRay.dir * depth;
-      *normal = computeWorldNormal(samplerState,
-          field,
-          p,
-          hit.instance->worldToObject,
-          sampleWithGradient);
+      *normal = computeWorldNormal(
+          samplerState, field, p, hit.instance->worldToObject);
     }
   }
 
diff --git a/devices/rtx/device/renderer/Renderer.cpp b/devices/rtx/device/renderer/Renderer.cpp
index 23b160d44..6dd2778dd 100644
--- a/devices/rtx/device/renderer/Renderer.cpp
+++ b/devices/rtx/device/renderer/Renderer.cpp
@@ -591,14 +591,6 @@ void Renderer::initOptixPipeline()
     constexpr auto SBT_CALLABLE_SPATIAL_FIELD_REGULAR_OFFSET =
         int(SbtCallableEntryPoints::SpatialFieldSamplerRegular);
     samplerDesc.callables.moduleDC = state.fieldSamplers.structuredRegular;
-    samplerDesc.callables.entryFunctionNameDC =
-        "__direct_callable__initStructuredRegularSampler";
-    callableDescs[SBT_CALLABLE_SPATIAL_FIELD_REGULAR_OFFSET
-        + int(SpatialFieldSamplerEntryPoints::Init)] = samplerDesc;
-    samplerDesc.callables.entryFunctionNameDC =
-        "__direct_callable__sampleStructuredRegular";
-    callableDescs[SBT_CALLABLE_SPATIAL_FIELD_REGULAR_OFFSET
-        + int(SpatialFieldSamplerEntryPoints::Sample)] = samplerDesc;
     samplerDesc.callables.entryFunctionNameDC =
         "__direct_callable__sampleDistanceStructuredRegular";
     callableDescs[SBT_CALLABLE_SPATIAL_FIELD_REGULAR_OFFSET
@@ -628,14 +620,6 @@ void Renderer::initOptixPipeline()
         int(SbtCallableEntryPoints::SpatialFieldSamplerNvdbFloat);
 
     // Fp4
-    samplerDesc.callables.entryFunctionNameDC =
-        "__direct_callable__initNvdbSamplerFp4";
-    callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_FP4_OFFSET
-        + int(SpatialFieldSamplerEntryPoints::Init)] = samplerDesc;
-    samplerDesc.callables.entryFunctionNameDC =
-        "__direct_callable__sampleNvdbFp4";
-    callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_FP4_OFFSET
-        + int(SpatialFieldSamplerEntryPoints::Sample)] = samplerDesc;
     samplerDesc.callables.entryFunctionNameDC =
         "__direct_callable__sampleDistanceNvdbFp4";
     callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_FP4_OFFSET
@@ -651,14 +635,6 @@ void Renderer::initOptixPipeline()
         + int(SpatialFieldSamplerEntryPoints::RayMarchVolume)] = samplerDesc;
 
     // Fp8
-    samplerDesc.callables.entryFunctionNameDC =
-        "__direct_callable__initNvdbSamplerFp8";
-    callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_FP8_OFFSET
-        + int(SpatialFieldSamplerEntryPoints::Init)] = samplerDesc;
-    samplerDesc.callables.entryFunctionNameDC =
-        "__direct_callable__sampleNvdbFp8";
-    callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_FP8_OFFSET
-        + int(SpatialFieldSamplerEntryPoints::Sample)] = samplerDesc;
     samplerDesc.callables.entryFunctionNameDC =
         "__direct_callable__sampleDistanceNvdbFp8";
     callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_FP8_OFFSET
@@ -674,14 +650,6 @@ void Renderer::initOptixPipeline()
         + int(SpatialFieldSamplerEntryPoints::RayMarchVolume)] = samplerDesc;
 
     // Fp16
-    samplerDesc.callables.entryFunctionNameDC =
-        "__direct_callable__initNvdbSamplerFp16";
-    callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_FP16_OFFSET
-        + int(SpatialFieldSamplerEntryPoints::Init)] = samplerDesc;
-    samplerDesc.callables.entryFunctionNameDC =
-        "__direct_callable__sampleNvdbFp16";
-    callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_FP16_OFFSET
-        + int(SpatialFieldSamplerEntryPoints::Sample)] = samplerDesc;
     samplerDesc.callables.entryFunctionNameDC =
         "__direct_callable__sampleDistanceNvdbFp16";
     callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_FP16_OFFSET
@@ -697,14 +665,6 @@ void Renderer::initOptixPipeline()
         + int(SpatialFieldSamplerEntryPoints::RayMarchVolume)] = samplerDesc;
 
     // FpN
-    samplerDesc.callables.entryFunctionNameDC =
-        "__direct_callable__initNvdbSamplerFpN";
-    callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_FPN_OFFSET
-        + int(SpatialFieldSamplerEntryPoints::Init)] = samplerDesc;
-    samplerDesc.callables.entryFunctionNameDC =
-        "__direct_callable__sampleNvdbFpN";
-    callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_FPN_OFFSET
-        + int(SpatialFieldSamplerEntryPoints::Sample)] = samplerDesc;
     samplerDesc.callables.entryFunctionNameDC =
         "__direct_callable__sampleDistanceNvdbFpN";
     callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_FPN_OFFSET
@@ -720,14 +680,6 @@ void Renderer::initOptixPipeline()
         + int(SpatialFieldSamplerEntryPoints::RayMarchVolume)] = samplerDesc;
 
     // Float
-    samplerDesc.callables.entryFunctionNameDC =
-        "__direct_callable__initNvdbSamplerFloat";
-    callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_FLOAT_OFFSET
-        + int(SpatialFieldSamplerEntryPoints::Init)] = samplerDesc;
-    samplerDesc.callables.entryFunctionNameDC =
-        "__direct_callable__sampleNvdbFloat";
-    callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_FLOAT_OFFSET
-        + int(SpatialFieldSamplerEntryPoints::Sample)] = samplerDesc;
     samplerDesc.callables.entryFunctionNameDC =
         "__direct_callable__sampleDistanceNvdbFloat";
     callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_FLOAT_OFFSET
@@ -747,14 +699,6 @@ void Renderer::initOptixPipeline()
     constexpr auto SBT_CALLABLE_SPATIAL_FIELD_RECTILINEAR_OFFSET =
         int(SbtCallableEntryPoints::SpatialFieldSamplerRectilinear);
 
-    samplerDesc.callables.entryFunctionNameDC =
-        "__direct_callable__initStructuredRectilinearSampler";
-    callableDescs[SBT_CALLABLE_SPATIAL_FIELD_RECTILINEAR_OFFSET
-        + int(SpatialFieldSamplerEntryPoints::Init)] = samplerDesc;
-    samplerDesc.callables.entryFunctionNameDC =
-        "__direct_callable__sampleStructuredRectilinear";
-    callableDescs[SBT_CALLABLE_SPATIAL_FIELD_RECTILINEAR_OFFSET
-        + int(SpatialFieldSamplerEntryPoints::Sample)] = samplerDesc;
     samplerDesc.callables.entryFunctionNameDC =
         "__direct_callable__sampleDistanceStructuredRectilinear";
     callableDescs[SBT_CALLABLE_SPATIAL_FIELD_RECTILINEAR_OFFSET
@@ -784,14 +728,6 @@ void Renderer::initOptixPipeline()
         int(SbtCallableEntryPoints::SpatialFieldSamplerNvdbRectilinearFloat);
 
     // Fp4
-    samplerDesc.callables.entryFunctionNameDC =
-        "__direct_callable__initNvdbRectilinearSamplerFp4";
-    callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_REC_FP4_OFFSET
-        + int(SpatialFieldSamplerEntryPoints::Init)] = samplerDesc;
-    samplerDesc.callables.entryFunctionNameDC =
-        "__direct_callable__sampleNvdbRectilinearFp4";
-    callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_REC_FP4_OFFSET
-        + int(SpatialFieldSamplerEntryPoints::Sample)] = samplerDesc;
     samplerDesc.callables.entryFunctionNameDC =
         "__direct_callable__sampleDistanceNvdbRectilinearFp4";
     callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_REC_FP4_OFFSET
@@ -807,14 +743,6 @@ void Renderer::initOptixPipeline()
         + int(SpatialFieldSamplerEntryPoints::RayMarchVolume)] = samplerDesc;
 
     // Fp8
-    samplerDesc.callables.entryFunctionNameDC =
-        "__direct_callable__initNvdbRectilinearSamplerFp8";
-    callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_REC_FP8_OFFSET
-        + int(SpatialFieldSamplerEntryPoints::Init)] = samplerDesc;
-    samplerDesc.callables.entryFunctionNameDC =
-        "__direct_callable__sampleNvdbRectilinearFp8";
-    callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_REC_FP8_OFFSET
-        + int(SpatialFieldSamplerEntryPoints::Sample)] = samplerDesc;
     samplerDesc.callables.entryFunctionNameDC =
         "__direct_callable__sampleDistanceNvdbRectilinearFp8";
     callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_REC_FP8_OFFSET
@@ -830,14 +758,6 @@ void Renderer::initOptixPipeline()
         + int(SpatialFieldSamplerEntryPoints::RayMarchVolume)] = samplerDesc;
 
     // Fp16
-    samplerDesc.callables.entryFunctionNameDC =
-        "__direct_callable__initNvdbRectilinearSamplerFp16";
-    callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_REC_FP16_OFFSET
-        + int(SpatialFieldSamplerEntryPoints::Init)] = samplerDesc;
-    samplerDesc.callables.entryFunctionNameDC =
-        "__direct_callable__sampleNvdbRectilinearFp16";
-    callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_REC_FP16_OFFSET
-        + int(SpatialFieldSamplerEntryPoints::Sample)] = samplerDesc;
     samplerDesc.callables.entryFunctionNameDC =
         "__direct_callable__sampleDistanceNvdbRectilinearFp16";
     callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_REC_FP16_OFFSET
@@ -853,14 +773,6 @@ void Renderer::initOptixPipeline()
         + int(SpatialFieldSamplerEntryPoints::RayMarchVolume)] = samplerDesc;
 
     // FpN
-    samplerDesc.callables.entryFunctionNameDC =
-        "__direct_callable__initNvdbRectilinearSamplerFpN";
-    callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_REC_FPN_OFFSET
-        + int(SpatialFieldSamplerEntryPoints::Init)] = samplerDesc;
-    samplerDesc.callables.entryFunctionNameDC =
-        "__direct_callable__sampleNvdbRectilinearFpN";
-    callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_REC_FPN_OFFSET
-        + int(SpatialFieldSamplerEntryPoints::Sample)] = samplerDesc;
     samplerDesc.callables.entryFunctionNameDC =
         "__direct_callable__sampleDistanceNvdbRectilinearFpN";
     callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_REC_FPN_OFFSET
@@ -876,14 +788,6 @@ void Renderer::initOptixPipeline()
         + int(SpatialFieldSamplerEntryPoints::RayMarchVolume)] = samplerDesc;
 
     // Float
-    samplerDesc.callables.entryFunctionNameDC =
-        "__direct_callable__initNvdbRectilinearSamplerFloat";
-    callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_REC_FLOAT_OFFSET
-        + int(SpatialFieldSamplerEntryPoints::Init)] = samplerDesc;
-    samplerDesc.callables.entryFunctionNameDC =
-        "__direct_callable__sampleNvdbRectilinearFloat";
-    callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_REC_FLOAT_OFFSET
-        + int(SpatialFieldSamplerEntryPoints::Sample)] = samplerDesc;
     samplerDesc.callables.entryFunctionNameDC =
         "__direct_callable__sampleDistanceNvdbRectilinearFloat";
     callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_REC_FLOAT_OFFSET
@@ -910,9 +814,13 @@ void Renderer::initOptixPipeline()
     callableDescs[SBT_CALLABLE_CUSTOM_OFFSET
         + int(SpatialFieldSamplerEntryPoints::Init)] = samplerDesc;
     samplerDesc.callables.entryFunctionNameDC =
-        "__direct_callable__sampleCustom";
+        "__direct_callable__sampleValueCustom";
+    callableDescs[SBT_CALLABLE_CUSTOM_OFFSET
+        + int(SpatialFieldSamplerEntryPoints::SampleValue)] = samplerDesc;
+    samplerDesc.callables.entryFunctionNameDC =
+        "__direct_callable__sampleNormalCustom";
     callableDescs[SBT_CALLABLE_CUSTOM_OFFSET
-        + int(SpatialFieldSamplerEntryPoints::Sample)] = samplerDesc;
+        + int(SpatialFieldSamplerEntryPoints::SampleNormal)] = samplerDesc;
     samplerDesc.callables.entryFunctionNameDC =
         "__direct_callable__sampleDistanceCustom";
     callableDescs[SBT_CALLABLE_CUSTOM_OFFSET
diff --git a/devices/rtx/device/spatial_field/CustomField.cpp b/devices/rtx/device/spatial_field/CustomField.cpp
new file mode 100644
index 000000000..60958ceb0
--- /dev/null
+++ b/devices/rtx/device/spatial_field/CustomField.cpp
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include "CustomField.h"
+
+namespace visrtx {
+
+void CustomField::finalize()
+{
+  // gpuData() virtual-dispatches to the concrete custom field, which has
+  // already uploaded its device data and called m_uniformGrid.init(...).
+  m_uniformGrid.computeValueRanges(gpuData());
+
+  // Upload gpuData() into the device registry. Must run after the grid is
+  // populated so the snapshot includes the macrocell pointers. Built-in fields
+  // do the same at the end of their finalize(); omitting it leaves
+  // registry.fields[index] uninitialized → garbage samplerCallableIndex/grid
+  // on the GPU → illegal access when the volume is sampled.
+  upload();
+}
+
+} // namespace visrtx
diff --git a/devices/rtx/device/spatial_field/CustomField.h b/devices/rtx/device/spatial_field/CustomField.h
index 0724cb5ae..12d091afa 100644
--- a/devices/rtx/device/spatial_field/CustomField.h
+++ b/devices/rtx/device/spatial_field/CustomField.h
@@ -25,6 +25,10 @@ struct CustomField : public SpatialField
 {
   CustomField(DeviceGlobalState *d) : SpatialField(d) {}
   ~CustomField() override = default;
+
+  // Builds the majorant grid from the (already-uploaded) field data. Derived
+  // fields must init the grid and upload GPU data before calling this.
+  void finalize() override;
 };
 
 } // namespace visrtx
diff --git a/devices/rtx/device/spatial_field/CustomFieldSampler_ptx.cu b/devices/rtx/device/spatial_field/CustomFieldSampler_ptx.cu
index 8cd9a3457..5409f34cb 100644
--- a/devices/rtx/device/spatial_field/CustomFieldSampler_ptx.cu
+++ b/devices/rtx/device/spatial_field/CustomFieldSampler_ptx.cu
@@ -13,7 +13,7 @@
  * 1. Define the field data struct and add to CustomFieldType enum
  * 2. Create a sampler header with sampleXxx() function
  * 3. Include the header below
- * 4. Add a case to the switch in __direct_callable__sampleCustom
+ * 4. Add a case to the dispatch used by sampleCustomImpl
  */
 
 #include "gpu/gpu_decl.h"
@@ -50,39 +50,98 @@ VISRTX_CALLABLE void __direct_callable__initCustomSampler(
   samplerState->custom = field->data.custom;
 }
 
-/**
- * @brief Sample the custom field at a given location
- *
- * Dispatches to the appropriate sampling function based on subType.
- * Returns a normalized field value in [0, 1].
- *
- * If no custom samplers are configured (VISRTX_CUSTOM_SAMPLE_DISPATCH
- * not defined), returns 0.0 as a fallback.
- */
-VISRTX_CALLABLE float __direct_callable__sampleCustom(
-    const VolumeSamplingState *samplerState,
-    const vec3 *location,
-    vec3 *gradient)
+// User value sampler. Returns 0 when no custom samplers are configured. The
+// user API exposes only a scalar sampler (VISRTX_CUSTOM_SAMPLE_DISPATCH); the
+// normal is derived from it by central differences below.
+namespace visrtx {
+VISRTX_DEVICE float sampleCustomImpl(const CustomFieldData &data, const vec3 &P)
 {
 #ifdef VISRTX_CUSTOM_SAMPLE_DISPATCH
-  const CustomFieldData &data = samplerState->custom;
-  const vec3 P = *location;
-
   VISRTX_CUSTOM_SAMPLE_DISPATCH(data, P)
 #else
   return 0.0f;
 #endif
 }
+} // namespace visrtx
+
+VISRTX_CALLABLE float __direct_callable__sampleValueCustom(
+    const VolumeSamplingState *samplerState,
+    const SpatialFieldGPUData *,
+    const vec3 *location)
+{
+  return sampleCustomImpl(samplerState->custom, *location);
+}
+
+// Central-difference gradient of the user value sampler. Returns the
+// unnormalized object-space gradient, matching the built-in sampleNormal
+// convention (caller orients + normalizes, see computeWorldNormal). Step is a
+// small fraction of the field's domain so it is scale-invariant.
+VISRTX_CALLABLE vec3 __direct_callable__sampleNormalCustom(
+    const VolumeSamplingState *samplerState,
+    const SpatialFieldGPUData *field,
+    const vec3 *location)
+{
+  const CustomFieldData &d = samplerState->custom;
+  const vec3 p = *location;
+  const vec3 extent = field->roi.upper - field->roi.lower;
+  const float eps = fmaxf(1e-3f * glm::length(extent), 1e-6f);
+  const float gx = sampleCustomImpl(d, p + vec3(eps, 0.f, 0.f))
+      - sampleCustomImpl(d, p - vec3(eps, 0.f, 0.f));
+  const float gy = sampleCustomImpl(d, p + vec3(0.f, eps, 0.f))
+      - sampleCustomImpl(d, p - vec3(0.f, eps, 0.f));
+  const float gz = sampleCustomImpl(d, p + vec3(0.f, 0.f, eps))
+      - sampleCustomImpl(d, p - vec3(0.f, 0.f, eps));
+  return vec3(gx, gy, gz) * (1.f / (2.f * eps));
+}
 
 //=============================================================================
 // Woodcock-body callables for custom fields. Because the inner sample()
-// implementation is user-supplied (compiled into __direct_callable__sampleCustom
-// in this same module), the Woodcock body dispatches via optixDirectCall to the
-// same module's Sample slot — callable-in-callable. Slower than the built-in
-// families' inline path but preserves the user-facing UX (custom-field authors
-// implement only init + sample).
+// implementation is user-supplied (compiled into the value/normal callables in
+// this same module), the Woodcock body dispatches via optixDirectCall to the
+// same module's SampleValue/SampleNormal slots — callable-in-callable. Slower
+// than the built-in families' inline path but preserves the user-facing UX
+// (custom-field authors implement only init + sample).
 //=============================================================================
 
+// Shared per-sample API (see gpu/volumeIntegrationDetail.h) for custom fields.
+// Unlike the built-in inline samplers, custom sampling routes back through the
+// user's init/value/normal direct-callables, so it needs `field` for the
+// callable index.
+namespace visrtx {
+
+VISRTX_DEVICE void initSamplerState(
+    VolumeSamplingState &s, const SpatialFieldGPUData &field)
+{
+  optixDirectCall<void>(uint32_t(field.samplerCallableIndex)
+          + uint32_t(SpatialFieldSamplerEntryPoints::Init),
+      &s,
+      &field);
+}
+
+VISRTX_DEVICE float sampleValue(const VolumeSamplingState &s,
+    const SpatialFieldGPUData &field,
+    const vec3 &p)
+{
+  return optixDirectCall<float>(uint32_t(field.samplerCallableIndex)
+          + uint32_t(SpatialFieldSamplerEntryPoints::SampleValue),
+      &s,
+      &field,
+      &p);
+}
+
+VISRTX_DEVICE vec3 sampleNormal(const VolumeSamplingState &s,
+    const SpatialFieldGPUData &field,
+    const vec3 &p)
+{
+  return optixDirectCall<vec3>(uint32_t(field.samplerCallableIndex)
+          + uint32_t(SpatialFieldSamplerEntryPoints::SampleNormal),
+      &s,
+      &field,
+      &p);
+}
+
+} // namespace visrtx
+
 VISRTX_CALLABLE float __direct_callable__sampleDistanceCustom(ScreenSample *ss,
     const VolumeHit *hit,
     vec3 *albedo,
@@ -93,9 +152,8 @@ VISRTX_CALLABLE float __direct_callable__sampleDistanceCustom(ScreenSample *ss,
   const auto &field =
       getSpatialFieldData(*ss->frameData, hit->volume->data.tf1d.field);
   VolumeSamplingState samplerState;
-  samplerState.custom = field.data.custom;
+  initSamplerState(samplerState, field);
 
-  const uint32_t baseIdx = uint32_t(field.samplerCallableIndex);
   return detail::woodcockSampleDistance(*ss,
       *hit,
       samplerState,
@@ -103,26 +161,7 @@ VISRTX_CALLABLE float __direct_callable__sampleDistanceCustom(ScreenSample *ss,
       *albedo,
       *extinction,
       *didScatter,
-      normal,
-      [baseIdx] __device__(const VolumeSamplingState &s,
-          const SpatialFieldGPUData &,
-          const vec3 &p) {
-        return optixDirectCall<float>(
-            baseIdx + uint32_t(SpatialFieldSamplerEntryPoints::Sample),
-            &s,
-            &p,
-            (vec3 *)nullptr);
-      },
-      [baseIdx] __device__(const VolumeSamplingState &s,
-          const SpatialFieldGPUData &,
-          const vec3 &p,
-          vec3 &g) {
-        return optixDirectCall<float>(
-            baseIdx + uint32_t(SpatialFieldSamplerEntryPoints::Sample),
-            &s,
-            &p,
-            &g);
-      });
+      normal);
 }
 
 VISRTX_CALLABLE void __direct_callable__ratioTrackTransmittanceCustom(
@@ -131,23 +170,10 @@ VISRTX_CALLABLE void __direct_callable__ratioTrackTransmittanceCustom(
   const auto &field =
       getSpatialFieldData(*ss->frameData, hit->volume->data.tf1d.field);
   VolumeSamplingState samplerState;
-  samplerState.custom = field.data.custom;
+  initSamplerState(samplerState, field);
 
-  const uint32_t baseIdx = uint32_t(field.samplerCallableIndex);
-  detail::woodcockRatioTrackTransmittance(*ss,
-      *hit,
-      samplerState,
-      field,
-      *attenuation,
-      [baseIdx] __device__(const VolumeSamplingState &s,
-          const SpatialFieldGPUData &,
-          const vec3 &p) {
-        return optixDirectCall<float>(
-            baseIdx + uint32_t(SpatialFieldSamplerEntryPoints::Sample),
-            &s,
-            &p,
-            (vec3 *)nullptr);
-      });
+  detail::woodcockRatioTrackTransmittance(
+      *ss, *hit, samplerState, field, *attenuation);
 }
 
 VISRTX_CALLABLE float __direct_callable__rayMarchVolumeCustom(ScreenSample *ss,
@@ -160,34 +186,8 @@ VISRTX_CALLABLE float __direct_callable__rayMarchVolumeCustom(ScreenSample *ss,
   const auto &field =
       getSpatialFieldData(*ss->frameData, hit->volume->data.tf1d.field);
   VolumeSamplingState samplerState;
-  samplerState.custom = field.data.custom;
+  initSamplerState(samplerState, field);
 
-  const uint32_t baseIdx = uint32_t(field.samplerCallableIndex);
-  return detail::latticeRayMarchVolume(*ss,
-      *hit,
-      samplerState,
-      field,
-      color,
-      normal,
-      *opacity,
-      invSamplingRate,
-      [baseIdx] __device__(const VolumeSamplingState &s,
-          const SpatialFieldGPUData &,
-          const vec3 &p) {
-        return optixDirectCall<float>(
-            baseIdx + uint32_t(SpatialFieldSamplerEntryPoints::Sample),
-            &s,
-            &p,
-            (vec3 *)nullptr);
-      },
-      [baseIdx] __device__(const VolumeSamplingState &s,
-          const SpatialFieldGPUData &,
-          const vec3 &p,
-          vec3 &g) {
-        return optixDirectCall<float>(
-            baseIdx + uint32_t(SpatialFieldSamplerEntryPoints::Sample),
-            &s,
-            &p,
-            &g);
-      });
+  return detail::latticeRayMarchVolume(
+      *ss, *hit, samplerState, field, color, normal, *opacity, invSamplingRate);
 }
diff --git a/devices/rtx/device/spatial_field/NvdbRectilinearSamplerInline.h b/devices/rtx/device/spatial_field/NvdbRectilinearSamplerInline.h
index e267b4a01..356104eb7 100644
--- a/devices/rtx/device/spatial_field/NvdbRectilinearSamplerInline.h
+++ b/devices/rtx/device/spatial_field/NvdbRectilinearSamplerInline.h
@@ -120,37 +120,43 @@ VISRTX_DEVICE float sampleAtIndexRectilinear(
   return state.linearSampler(clamped);
 }
 
+// Shared per-sample API (see gpu/volumeIntegrationDetail.h). sampleValue
+// returns the field value; sampleNormal returns the unnormalized object-space
+// gradient (the raw normal direction) — callers orient and normalize. `field`
+// is unused for built-in fields (present for a uniform overload set).
 template <typename ValueType>
-VISRTX_DEVICE float sampleNvdbRectilinear(
+VISRTX_DEVICE float sampleValue(
     const NvdbRectilinearSamplerState<ValueType> &state,
-    const vec3 *location,
-    vec3 *gradient)
+    const SpatialFieldGPUData &,
+    const vec3 &p)
 {
-  const auto indexPos = worldToIndexRectilinear(state, location);
-  const float value = sampleAtIndexRectilinear(state, indexPos);
-
-  if (gradient) {
-    const float sxp =
-        sampleAtIndexRectilinear(state, indexPos + nanovdb::Vec3f(1, 0, 0));
-    const float sxn =
-        sampleAtIndexRectilinear(state, indexPos - nanovdb::Vec3f(1, 0, 0));
-    const float syp =
-        sampleAtIndexRectilinear(state, indexPos + nanovdb::Vec3f(0, 1, 0));
-    const float syn =
-        sampleAtIndexRectilinear(state, indexPos - nanovdb::Vec3f(0, 1, 0));
-    const float szp =
-        sampleAtIndexRectilinear(state, indexPos + nanovdb::Vec3f(0, 0, 1));
-    const float szn =
-        sampleAtIndexRectilinear(state, indexPos - nanovdb::Vec3f(0, 0, 1));
-
-    *gradient = vec3(sxp - sxn, syp - syn, szp - szn)
-        * vec3(state.invAvgVoxelSize[0],
-            state.invAvgVoxelSize[1],
-            state.invAvgVoxelSize[2])
-        * 0.5f;
-  }
+  return sampleAtIndexRectilinear(state, worldToIndexRectilinear(state, &p));
+}
 
-  return value;
+template <typename ValueType>
+VISRTX_DEVICE vec3 sampleNormal(
+    const NvdbRectilinearSamplerState<ValueType> &state,
+    const SpatialFieldGPUData &,
+    const vec3 &p)
+{
+  const auto indexPos = worldToIndexRectilinear(state, &p);
+  const float sxp =
+      sampleAtIndexRectilinear(state, indexPos + nanovdb::Vec3f(1, 0, 0));
+  const float sxn =
+      sampleAtIndexRectilinear(state, indexPos - nanovdb::Vec3f(1, 0, 0));
+  const float syp =
+      sampleAtIndexRectilinear(state, indexPos + nanovdb::Vec3f(0, 1, 0));
+  const float syn =
+      sampleAtIndexRectilinear(state, indexPos - nanovdb::Vec3f(0, 1, 0));
+  const float szp =
+      sampleAtIndexRectilinear(state, indexPos + nanovdb::Vec3f(0, 0, 1));
+  const float szn =
+      sampleAtIndexRectilinear(state, indexPos - nanovdb::Vec3f(0, 0, 1));
+  return vec3(sxp - sxn, syp - syn, szp - szn)
+      * vec3(state.invAvgVoxelSize[0],
+          state.invAvgVoxelSize[1],
+          state.invAvgVoxelSize[2])
+      * 0.5f;
 }
 
 } // namespace visrtx
diff --git a/devices/rtx/device/spatial_field/NvdbRectilinearSampler_ptx.cu b/devices/rtx/device/spatial_field/NvdbRectilinearSampler_ptx.cu
index fc3f52fe6..b29b92059 100644
--- a/devices/rtx/device/spatial_field/NvdbRectilinearSampler_ptx.cu
+++ b/devices/rtx/device/spatial_field/NvdbRectilinearSampler_ptx.cu
@@ -30,93 +30,15 @@
  */
 
 // OptiX direct-callable entry points for the NanoVDB rectilinear-grid sampler.
-// Implementations live in NvdbRectilinearSamplerInline.h.
+// Only the Woodcock-body callables are exposed via the SBT; value/normal/init
+// sampling stays inline (NvdbRectilinearSamplerInline.h) since the bodies below
+// resolve sampleValue/sampleNormal by ADL on the concrete state type.
 
 #include "NvdbRectilinearSamplerInline.h"
 #include "gpu/volumeIntegrationDetail.h"
 
 using namespace visrtx;
 
-// Fp4 rectilinear sampler
-VISRTX_CALLABLE void __direct_callable__initNvdbRectilinearSamplerFp4(
-    VolumeSamplingState *samplerState, const SpatialFieldGPUData *field)
-{
-  initNvdbRectilinearSampler(samplerState->nvdbRectilinearFp4, field);
-}
-
-VISRTX_CALLABLE float __direct_callable__sampleNvdbRectilinearFp4(
-    const VolumeSamplingState *samplerState,
-    const vec3 *location,
-    vec3 *gradient)
-{
-  return sampleNvdbRectilinear(
-      samplerState->nvdbRectilinearFp4, location, gradient);
-}
-
-// Fp8 rectilinear sampler
-VISRTX_CALLABLE void __direct_callable__initNvdbRectilinearSamplerFp8(
-    VolumeSamplingState *samplerState, const SpatialFieldGPUData *field)
-{
-  initNvdbRectilinearSampler(samplerState->nvdbRectilinearFp8, field);
-}
-
-VISRTX_CALLABLE float __direct_callable__sampleNvdbRectilinearFp8(
-    const VolumeSamplingState *samplerState,
-    const vec3 *location,
-    vec3 *gradient)
-{
-  return sampleNvdbRectilinear(
-      samplerState->nvdbRectilinearFp8, location, gradient);
-}
-
-// Fp16 rectilinear sampler
-VISRTX_CALLABLE void __direct_callable__initNvdbRectilinearSamplerFp16(
-    VolumeSamplingState *samplerState, const SpatialFieldGPUData *field)
-{
-  initNvdbRectilinearSampler(samplerState->nvdbRectilinearFp16, field);
-}
-
-VISRTX_CALLABLE float __direct_callable__sampleNvdbRectilinearFp16(
-    const VolumeSamplingState *samplerState,
-    const vec3 *location,
-    vec3 *gradient)
-{
-  return sampleNvdbRectilinear(
-      samplerState->nvdbRectilinearFp16, location, gradient);
-}
-
-// FpN rectilinear sampler
-VISRTX_CALLABLE void __direct_callable__initNvdbRectilinearSamplerFpN(
-    VolumeSamplingState *samplerState, const SpatialFieldGPUData *field)
-{
-  initNvdbRectilinearSampler(samplerState->nvdbRectilinearFpN, field);
-}
-
-VISRTX_CALLABLE float __direct_callable__sampleNvdbRectilinearFpN(
-    const VolumeSamplingState *samplerState,
-    const vec3 *location,
-    vec3 *gradient)
-{
-  return sampleNvdbRectilinear(
-      samplerState->nvdbRectilinearFpN, location, gradient);
-}
-
-// Float rectilinear sampler
-VISRTX_CALLABLE void __direct_callable__initNvdbRectilinearSamplerFloat(
-    VolumeSamplingState *samplerState, const SpatialFieldGPUData *field)
-{
-  initNvdbRectilinearSampler(samplerState->nvdbRectilinearFloat, field);
-}
-
-VISRTX_CALLABLE float __direct_callable__sampleNvdbRectilinearFloat(
-    const VolumeSamplingState *samplerState,
-    const vec3 *location,
-    vec3 *gradient)
-{
-  return sampleNvdbRectilinear(
-      samplerState->nvdbRectilinearFloat, location, gradient);
-}
-
 // Woodcock-body callables — see NvdbRegularSampler_ptx.cu for the design rationale.
 #define VISRTX_DEFINE_NVDB_RECT_WOODCOCK_CALLABLES(Suffix, ValueType)         \
   VISRTX_CALLABLE float __direct_callable__sampleDistance##Suffix(            \
@@ -139,14 +61,7 @@ VISRTX_CALLABLE float __direct_callable__sampleNvdbRectilinearFloat(
         *albedo,                                                              \
         *extinction,                                                          \
         *didScatter,                                                          \
-        normal,                                                               \
-        [] __device__(const NvdbRectilinearSamplerState<ValueType> &s,        \
-            const SpatialFieldGPUData &,                                      \
-            const vec3 &p) { return sampleNvdbRectilinear(s, &p, nullptr); }, \
-        [] __device__(const NvdbRectilinearSamplerState<ValueType> &s,        \
-            const SpatialFieldGPUData &,                                      \
-            const vec3 &p,                                                    \
-            vec3 &g) { return sampleNvdbRectilinear(s, &p, &g); });           \
+        normal);                                                              \
   }                                                                           \
                                                                               \
   VISRTX_CALLABLE void __direct_callable__ratioTrackTransmittance##Suffix(    \
@@ -157,14 +72,8 @@ VISRTX_CALLABLE float __direct_callable__sampleNvdbRectilinearFloat(
     SamplerStateBox<NvdbRectilinearSamplerState<ValueType>> stateBox;         \
     auto &samplerState = stateBox.state;                                      \
     initNvdbRectilinearSampler(samplerState, &field);                         \
-    detail::woodcockRatioTrackTransmittance(*ss,                              \
-        *hit,                                                                 \
-        samplerState,                                                         \
-        field,                                                                \
-        *attenuation,                                                         \
-        [] __device__(const NvdbRectilinearSamplerState<ValueType> &s,        \
-            const SpatialFieldGPUData &,                                      \
-            const vec3 &p) { return sampleNvdbRectilinear(s, &p, nullptr); });\
+    detail::woodcockRatioTrackTransmittance(                                  \
+        *ss, *hit, samplerState, field, *attenuation);                        \
   }                                                                           \
                                                                               \
   VISRTX_CALLABLE float __direct_callable__rayMarchVolume##Suffix(            \
@@ -187,14 +96,7 @@ VISRTX_CALLABLE float __direct_callable__sampleNvdbRectilinearFloat(
         color,                                                                \
         normal,                                                               \
         *opacity,                                                             \
-        invSamplingRate,                                                      \
-        [] __device__(const NvdbRectilinearSamplerState<ValueType> &s,        \
-            const SpatialFieldGPUData &,                                      \
-            const vec3 &p) { return sampleNvdbRectilinear(s, &p, nullptr); }, \
-        [] __device__(const NvdbRectilinearSamplerState<ValueType> &s,        \
-            const SpatialFieldGPUData &,                                      \
-            const vec3 &p,                                                    \
-            vec3 &g) { return sampleNvdbRectilinear(s, &p, &g); });           \
+        invSamplingRate);                                                     \
   }
 
 VISRTX_DEFINE_NVDB_RECT_WOODCOCK_CALLABLES(NvdbRectilinearFp4, nanovdb::Fp4)
diff --git a/devices/rtx/device/spatial_field/NvdbRegularSamplerInline.h b/devices/rtx/device/spatial_field/NvdbRegularSamplerInline.h
index de967b1dc..649aba3ec 100644
--- a/devices/rtx/device/spatial_field/NvdbRegularSamplerInline.h
+++ b/devices/rtx/device/spatial_field/NvdbRegularSamplerInline.h
@@ -32,10 +32,10 @@
 #pragma once
 
 // NanoVDB regular-grid sampler — inline device implementations.
-// Lives in a header so the volume integrator can call sampleNvdb directly,
-// bypassing the OptiX direct-callable dispatch on hot paths. The matching
-// __direct_callable__ entry points stay in NvdbRegularSampler_ptx.cu for
-// renderers that go through the SBT slot.
+// Lives in a header so the volume integrator and isosurface can call
+// sampleValue/sampleNormal directly, bypassing the OptiX direct-callable
+// dispatch on hot paths. The matching __direct_callable__ entry points stay in
+// NvdbRegularSampler_ptx.cu for renderers that go through the SBT slot.
 
 #include "gpu/gpu_decl.h"
 #include "gpu/gpu_objects.h"
@@ -106,64 +106,55 @@ VISRTX_DEVICE void initNvdbSampler(
   state.invTwoVoxelSize = nanovdb::Vec3f(iv.x, iv.y, iv.z);
 }
 
+// Object-space position -> index-space sample coordinate.
 template <typename ValueType>
-VISRTX_DEVICE float sampleNvdb(
-    const NvdbRegularSamplerState<ValueType> &state,
-    const vec3 *location,
-    vec3 *gradient)
+VISRTX_DEVICE nanovdb::Vec3f nvdbIndexPos(
+    const NvdbRegularSamplerState<ValueType> &state, const vec3 &p)
 {
-  const auto indexPos0 = state.grid->worldToIndexF(
-      nanovdb::Vec3f(location->x, location->y, location->z));
-
-  const auto indexPos =
-      (indexPos0 - state.offsetDown) * state.scale + state.offsetUp;
+  const auto indexPos0 =
+      state.grid->worldToIndexF(nanovdb::Vec3f(p.x, p.y, p.z));
+  return (indexPos0 - state.offsetDown) * state.scale + state.offsetUp;
+}
 
-  const auto clamped = clampNvdb(indexPos, state.indexMin, state.indexMax);
+// Filtered fetch at an index-space coordinate (clamped to the grid).
+template <typename ValueType>
+VISRTX_DEVICE float nvdbSampleAtIndex(
+    const NvdbRegularSamplerState<ValueType> &state, const nanovdb::Vec3f &idx)
+{
+  const auto c = clampNvdb(idx, state.indexMin, state.indexMax);
+  return state.filter == SpatialFieldFilter::Nearest ? state.nearestSampler(c)
+                                                      : state.linearSampler(c);
+}
 
-  if (state.filter == SpatialFieldFilter::Nearest) {
-    const float value = state.nearestSampler(clamped);
-    if (gradient) {
-      // Central differences at ±1 voxel in index space
-      const float sxp = state.nearestSampler(clampNvdb(
-          indexPos + nanovdb::Vec3f(1, 0, 0), state.indexMin, state.indexMax));
-      const float sxn = state.nearestSampler(clampNvdb(
-          indexPos - nanovdb::Vec3f(1, 0, 0), state.indexMin, state.indexMax));
-      const float syp = state.nearestSampler(clampNvdb(
-          indexPos + nanovdb::Vec3f(0, 1, 0), state.indexMin, state.indexMax));
-      const float syn = state.nearestSampler(clampNvdb(
-          indexPos - nanovdb::Vec3f(0, 1, 0), state.indexMin, state.indexMax));
-      const float szp = state.nearestSampler(clampNvdb(
-          indexPos + nanovdb::Vec3f(0, 0, 1), state.indexMin, state.indexMax));
-      const float szn = state.nearestSampler(clampNvdb(
-          indexPos - nanovdb::Vec3f(0, 0, 1), state.indexMin, state.indexMax));
-      // Convert from index space to object space
-      *gradient =
-          vec3((sxp - sxn) * state.scale[0] * state.invTwoVoxelSize[0],
-              (syp - syn) * state.scale[1] * state.invTwoVoxelSize[1],
-              (szp - szn) * state.scale[2] * state.invTwoVoxelSize[2]);
-    }
-    return value;
-  }
+// Shared per-sample API (see gpu/volumeIntegrationDetail.h). sampleValue
+// returns the field value; sampleNormal returns the unnormalized object-space
+// gradient (the raw normal direction) — callers orient and normalize. `field`
+// is unused for built-in fields (present for a uniform overload set).
+template <typename ValueType>
+VISRTX_DEVICE float sampleValue(const NvdbRegularSamplerState<ValueType> &state,
+    const SpatialFieldGPUData &,
+    const vec3 &p)
+{
+  return nvdbSampleAtIndex(state, nvdbIndexPos(state, p));
+}
 
-  const float value = state.linearSampler(clamped);
-  if (gradient) {
-    const float sxp = state.linearSampler(clampNvdb(
-        indexPos + nanovdb::Vec3f(1, 0, 0), state.indexMin, state.indexMax));
-    const float sxn = state.linearSampler(clampNvdb(
-        indexPos - nanovdb::Vec3f(1, 0, 0), state.indexMin, state.indexMax));
-    const float syp = state.linearSampler(clampNvdb(
-        indexPos + nanovdb::Vec3f(0, 1, 0), state.indexMin, state.indexMax));
-    const float syn = state.linearSampler(clampNvdb(
-        indexPos - nanovdb::Vec3f(0, 1, 0), state.indexMin, state.indexMax));
-    const float szp = state.linearSampler(clampNvdb(
-        indexPos + nanovdb::Vec3f(0, 0, 1), state.indexMin, state.indexMax));
-    const float szn = state.linearSampler(clampNvdb(
-        indexPos - nanovdb::Vec3f(0, 0, 1), state.indexMin, state.indexMax));
-    *gradient = vec3((sxp - sxn) * state.scale[0] * state.invTwoVoxelSize[0],
-        (syp - syn) * state.scale[1] * state.invTwoVoxelSize[1],
-        (szp - szn) * state.scale[2] * state.invTwoVoxelSize[2]);
-  }
-  return value;
+template <typename ValueType>
+VISRTX_DEVICE vec3 sampleNormal(
+    const NvdbRegularSamplerState<ValueType> &state,
+    const SpatialFieldGPUData &,
+    const vec3 &p)
+{
+  // Central differences at ±1 voxel in index space, mapped to object space.
+  const auto indexPos = nvdbIndexPos(state, p);
+  const float sxp = nvdbSampleAtIndex(state, indexPos + nanovdb::Vec3f(1, 0, 0));
+  const float sxn = nvdbSampleAtIndex(state, indexPos - nanovdb::Vec3f(1, 0, 0));
+  const float syp = nvdbSampleAtIndex(state, indexPos + nanovdb::Vec3f(0, 1, 0));
+  const float syn = nvdbSampleAtIndex(state, indexPos - nanovdb::Vec3f(0, 1, 0));
+  const float szp = nvdbSampleAtIndex(state, indexPos + nanovdb::Vec3f(0, 0, 1));
+  const float szn = nvdbSampleAtIndex(state, indexPos - nanovdb::Vec3f(0, 0, 1));
+  return vec3((sxp - sxn) * state.scale[0] * state.invTwoVoxelSize[0],
+      (syp - syn) * state.scale[1] * state.invTwoVoxelSize[1],
+      (szp - szn) * state.scale[2] * state.invTwoVoxelSize[2]);
 }
 
 } // namespace visrtx
diff --git a/devices/rtx/device/spatial_field/NvdbRegularSampler_ptx.cu b/devices/rtx/device/spatial_field/NvdbRegularSampler_ptx.cu
index ce0ee5037..d5738f79b 100644
--- a/devices/rtx/device/spatial_field/NvdbRegularSampler_ptx.cu
+++ b/devices/rtx/device/spatial_field/NvdbRegularSampler_ptx.cu
@@ -30,89 +30,21 @@
  */
 
 // OptiX direct-callable entry points for the NanoVDB regular-grid sampler.
-// The actual implementations live in NvdbRegularSamplerInline.h so the volume
-// integrator can call them inline on hot paths; this file only registers them
-// against the SBT slots for the fallback dispatch path.
+// Only the Woodcock-body callables are exposed via the SBT; value/normal/init
+// sampling stays inline (NvdbRegularSamplerInline.h) since the bodies below
+// resolve sampleValue/sampleNormal by ADL on the concrete state type, and the
+// volume integrator calls the inline sampleValue/sampleNormal directly on hot
+// paths.
 
 #include "NvdbRegularSamplerInline.h"
 #include "gpu/volumeIntegrationDetail.h"
 
 using namespace visrtx;
 
-// Fp4 sampler
-VISRTX_CALLABLE void __direct_callable__initNvdbSamplerFp4(
-    VolumeSamplingState *samplerState, const SpatialFieldGPUData *field)
-{
-  initNvdbSampler(samplerState->nvdbFp4, field);
-}
-
-VISRTX_CALLABLE float __direct_callable__sampleNvdbFp4(
-    const VolumeSamplingState *samplerState, const vec3 *location,
-    vec3 *gradient)
-{
-  return sampleNvdb(samplerState->nvdbFp4, location, gradient);
-}
-
-// Fp8 sampler
-VISRTX_CALLABLE void __direct_callable__initNvdbSamplerFp8(
-    VolumeSamplingState *samplerState, const SpatialFieldGPUData *field)
-{
-  initNvdbSampler(samplerState->nvdbFp8, field);
-}
-
-VISRTX_CALLABLE float __direct_callable__sampleNvdbFp8(
-    const VolumeSamplingState *samplerState, const vec3 *location,
-    vec3 *gradient)
-{
-  return sampleNvdb(samplerState->nvdbFp8, location, gradient);
-}
-
-// Fp16 sampler
-VISRTX_CALLABLE void __direct_callable__initNvdbSamplerFp16(
-    VolumeSamplingState *samplerState, const SpatialFieldGPUData *field)
-{
-  initNvdbSampler(samplerState->nvdbFp16, field);
-}
-
-VISRTX_CALLABLE float __direct_callable__sampleNvdbFp16(
-    const VolumeSamplingState *samplerState, const vec3 *location,
-    vec3 *gradient)
-{
-  return sampleNvdb(samplerState->nvdbFp16, location, gradient);
-}
-
-// FpN sampler
-VISRTX_CALLABLE void __direct_callable__initNvdbSamplerFpN(
-    VolumeSamplingState *samplerState, const SpatialFieldGPUData *field)
-{
-  initNvdbSampler(samplerState->nvdbFpN, field);
-}
-
-VISRTX_CALLABLE float __direct_callable__sampleNvdbFpN(
-    const VolumeSamplingState *samplerState, const vec3 *location,
-    vec3 *gradient)
-{
-  return sampleNvdb(samplerState->nvdbFpN, location, gradient);
-}
-
-// Float sampler
-VISRTX_CALLABLE void __direct_callable__initNvdbSamplerFloat(
-    VolumeSamplingState *samplerState, const SpatialFieldGPUData *field)
-{
-  initNvdbSampler(samplerState->nvdbFloat, field);
-}
-
-VISRTX_CALLABLE float __direct_callable__sampleNvdbFloat(
-    const VolumeSamplingState *samplerState, const vec3 *location,
-    vec3 *gradient)
-{
-  return sampleNvdb(samplerState->nvdbFloat, location, gradient);
-}
-
 // Woodcock-body callables — one per variant. Each stack-allocates the typed
 // sampler state, inits it once, then runs the shared body from
-// volumeIntegrationDetail.h with __device__ lambdas resolving to the
-// variant's inline sampleNvdb<T>.
+// volumeIntegrationDetail.h, which calls the variant's inline
+// sampleValue/sampleNormal.
 #define VISRTX_DEFINE_NVDB_WOODCOCK_CALLABLES(Suffix, ValueType)              \
   VISRTX_CALLABLE float __direct_callable__sampleDistance##Suffix(            \
       ScreenSample *ss,                                                       \
@@ -134,14 +66,7 @@ VISRTX_CALLABLE float __direct_callable__sampleNvdbFloat(
         *albedo,                                                              \
         *extinction,                                                          \
         *didScatter,                                                          \
-        normal,                                                               \
-        [] __device__(const NvdbRegularSamplerState<ValueType> &s,            \
-            const SpatialFieldGPUData &,                                      \
-            const vec3 &p) { return sampleNvdb(s, &p, nullptr); },            \
-        [] __device__(const NvdbRegularSamplerState<ValueType> &s,            \
-            const SpatialFieldGPUData &,                                      \
-            const vec3 &p,                                                    \
-            vec3 &g) { return sampleNvdb(s, &p, &g); });                      \
+        normal);                                                              \
   }                                                                           \
                                                                               \
   VISRTX_CALLABLE void __direct_callable__ratioTrackTransmittance##Suffix(    \
@@ -152,14 +77,8 @@ VISRTX_CALLABLE float __direct_callable__sampleNvdbFloat(
     SamplerStateBox<NvdbRegularSamplerState<ValueType>> stateBox;             \
     auto &samplerState = stateBox.state;                                      \
     initNvdbSampler(samplerState, &field);                                    \
-    detail::woodcockRatioTrackTransmittance(*ss,                              \
-        *hit,                                                                 \
-        samplerState,                                                         \
-        field,                                                                \
-        *attenuation,                                                         \
-        [] __device__(const NvdbRegularSamplerState<ValueType> &s,            \
-            const SpatialFieldGPUData &,                                      \
-            const vec3 &p) { return sampleNvdb(s, &p, nullptr); });           \
+    detail::woodcockRatioTrackTransmittance(                                  \
+        *ss, *hit, samplerState, field, *attenuation);                        \
   }                                                                           \
                                                                               \
   VISRTX_CALLABLE float __direct_callable__rayMarchVolume##Suffix(            \
@@ -182,14 +101,7 @@ VISRTX_CALLABLE float __direct_callable__sampleNvdbFloat(
         color,                                                                \
         normal,                                                               \
         *opacity,                                                             \
-        invSamplingRate,                                                      \
-        [] __device__(const NvdbRegularSamplerState<ValueType> &s,            \
-            const SpatialFieldGPUData &,                                      \
-            const vec3 &p) { return sampleNvdb(s, &p, nullptr); },            \
-        [] __device__(const NvdbRegularSamplerState<ValueType> &s,            \
-            const SpatialFieldGPUData &,                                      \
-            const vec3 &p,                                                    \
-            vec3 &g) { return sampleNvdb(s, &p, &g); });                      \
+        invSamplingRate);                                                     \
   }
 
 VISRTX_DEFINE_NVDB_WOODCOCK_CALLABLES(NvdbFp4, nanovdb::Fp4)
diff --git a/devices/rtx/device/spatial_field/StructuredRectilinearSamplerInline.h b/devices/rtx/device/spatial_field/StructuredRectilinearSamplerInline.h
index 6c65dd743..8b478e229 100644
--- a/devices/rtx/device/spatial_field/StructuredRectilinearSamplerInline.h
+++ b/devices/rtx/device/spatial_field/StructuredRectilinearSamplerInline.h
@@ -59,41 +59,52 @@ VISRTX_DEVICE void initStructuredRectilinearSampler(
       data.cellCentered ? (state.dims + vec3(1)) / extent : state.dims / extent;
 }
 
-VISRTX_DEVICE float sampleStructuredRectilinear(
-    const StructuredRectilinearSamplerState &state,
-    const vec3 *location,
-    vec3 *gradient)
+// Maps an object-space position to texture sample coordinates through the
+// per-axis warp LUTs.
+VISRTX_DEVICE vec3 structuredRectilinearCoord(
+    const StructuredRectilinearSamplerState &state, const vec3 &p)
 {
-  vec3 normalizedPos = (*location - state.axisBoundsMin)
-      / (state.axisBoundsMax - state.axisBoundsMin);
+  vec3 normalizedPos =
+      (p - state.axisBoundsMin) / (state.axisBoundsMax - state.axisBoundsMin);
   normalizedPos = vec3(tex1D<float>(state.axisLUT[0], normalizedPos.x),
       tex1D<float>(state.axisLUT[1], normalizedPos.y),
       tex1D<float>(state.axisLUT[2], normalizedPos.z));
-  const auto sampleCoord = normalizedPos * state.dims + state.offset;
-
-  const float value =
-      tex3D<float>(state.texObj, sampleCoord.x, sampleCoord.y, sampleCoord.z);
+  return normalizedPos * state.dims + state.offset;
+}
 
-  if (gradient) {
-    const auto px = sampleCoord + vec3(1, 0, 0);
-    const auto nx = sampleCoord - vec3(1, 0, 0);
-    const auto py = sampleCoord + vec3(0, 1, 0);
-    const auto ny = sampleCoord - vec3(0, 1, 0);
-    const auto pz = sampleCoord + vec3(0, 0, 1);
-    const auto nz = sampleCoord - vec3(0, 0, 1);
+// Shared per-sample API (see gpu/volumeIntegrationDetail.h). sampleValue
+// returns the field value; sampleNormal returns the unnormalized object-space
+// gradient (the raw normal direction) — callers orient and normalize. `field`
+// is unused for built-in fields (present for a uniform overload set).
+VISRTX_DEVICE float sampleValue(const StructuredRectilinearSamplerState &state,
+    const SpatialFieldGPUData &,
+    const vec3 &p)
+{
+  const auto c = structuredRectilinearCoord(state, p);
+  return tex3D<float>(state.texObj, c.x, c.y, c.z);
+}
 
-    const float sxp = tex3D<float>(state.texObj, px.x, px.y, px.z);
-    const float sxn = tex3D<float>(state.texObj, nx.x, nx.y, nx.z);
-    const float syp = tex3D<float>(state.texObj, py.x, py.y, py.z);
-    const float syn = tex3D<float>(state.texObj, ny.x, ny.y, ny.z);
-    const float szp = tex3D<float>(state.texObj, pz.x, pz.y, pz.z);
-    const float szn = tex3D<float>(state.texObj, nz.x, nz.y, nz.z);
+VISRTX_DEVICE vec3 sampleNormal(const StructuredRectilinearSamplerState &state,
+    const SpatialFieldGPUData &,
+    const vec3 &p)
+{
+  const auto sampleCoord = structuredRectilinearCoord(state, p);
+  const auto px = sampleCoord + vec3(1, 0, 0);
+  const auto nx = sampleCoord - vec3(1, 0, 0);
+  const auto py = sampleCoord + vec3(0, 1, 0);
+  const auto ny = sampleCoord - vec3(0, 1, 0);
+  const auto pz = sampleCoord + vec3(0, 0, 1);
+  const auto nz = sampleCoord - vec3(0, 0, 1);
 
-    *gradient =
-        vec3(sxp - sxn, syp - syn, szp - szn) * state.invAvgVoxelSpacing * 0.5f;
-  }
+  const float sxp = tex3D<float>(state.texObj, px.x, px.y, px.z);
+  const float sxn = tex3D<float>(state.texObj, nx.x, nx.y, nx.z);
+  const float syp = tex3D<float>(state.texObj, py.x, py.y, py.z);
+  const float syn = tex3D<float>(state.texObj, ny.x, ny.y, ny.z);
+  const float szp = tex3D<float>(state.texObj, pz.x, pz.y, pz.z);
+  const float szn = tex3D<float>(state.texObj, nz.x, nz.y, nz.z);
 
-  return value;
+  return vec3(sxp - sxn, syp - syn, szp - szn) * state.invAvgVoxelSpacing
+      * 0.5f;
 }
 
 } // namespace visrtx
diff --git a/devices/rtx/device/spatial_field/StructuredRectilinearSampler_ptx.cu b/devices/rtx/device/spatial_field/StructuredRectilinearSampler_ptx.cu
index ef280154d..4e011350b 100644
--- a/devices/rtx/device/spatial_field/StructuredRectilinearSampler_ptx.cu
+++ b/devices/rtx/device/spatial_field/StructuredRectilinearSampler_ptx.cu
@@ -30,29 +30,15 @@
  */
 
 // OptiX direct-callable entry points for the structured rectilinear sampler.
-// Implementations live in StructuredRectilinearSamplerInline.h.
+// Only the Woodcock-body callables are exposed via the SBT; value/normal/init
+// sampling stays inline (StructuredRectilinearSamplerInline.h) since the bodies
+// below resolve sampleValue/sampleNormal by ADL on the concrete state type.
 
 #include "StructuredRectilinearSamplerInline.h"
 #include "gpu/volumeIntegrationDetail.h"
 
 using namespace visrtx;
 
-VISRTX_CALLABLE void __direct_callable__initStructuredRectilinearSampler(
-    VolumeSamplingState *samplerState, const SpatialFieldGPUData *field)
-{
-  initStructuredRectilinearSampler(
-      samplerState->structuredRectilinear, field);
-}
-
-VISRTX_CALLABLE float __direct_callable__sampleStructuredRectilinear(
-    const VolumeSamplingState *samplerState,
-    const vec3 *location,
-    vec3 *gradient)
-{
-  return sampleStructuredRectilinear(
-      samplerState->structuredRectilinear, location, gradient);
-}
-
 VISRTX_CALLABLE float
 __direct_callable__sampleDistanceStructuredRectilinear(ScreenSample *ss,
     const VolumeHit *hit,
@@ -73,16 +59,7 @@ __direct_callable__sampleDistanceStructuredRectilinear(ScreenSample *ss,
       *albedo,
       *extinction,
       *didScatter,
-      normal,
-      [] __device__(const StructuredRectilinearSamplerState &s,
-          const SpatialFieldGPUData &,
-          const vec3 &p) {
-        return sampleStructuredRectilinear(s, &p, nullptr);
-      },
-      [] __device__(const StructuredRectilinearSamplerState &s,
-          const SpatialFieldGPUData &,
-          const vec3 &p,
-          vec3 &g) { return sampleStructuredRectilinear(s, &p, &g); });
+      normal);
 }
 
 VISRTX_CALLABLE void
@@ -94,16 +71,8 @@ __direct_callable__ratioTrackTransmittanceStructuredRectilinear(
   SamplerStateBox<StructuredRectilinearSamplerState> stateBox;
   auto &samplerState = stateBox.state;
   initStructuredRectilinearSampler(samplerState, &field);
-  detail::woodcockRatioTrackTransmittance(*ss,
-      *hit,
-      samplerState,
-      field,
-      *attenuation,
-      [] __device__(const StructuredRectilinearSamplerState &s,
-          const SpatialFieldGPUData &,
-          const vec3 &p) {
-        return sampleStructuredRectilinear(s, &p, nullptr);
-      });
+  detail::woodcockRatioTrackTransmittance(
+      *ss, *hit, samplerState, field, *attenuation);
 }
 
 VISRTX_CALLABLE float __direct_callable__rayMarchVolumeStructuredRectilinear(
@@ -119,21 +88,6 @@ VISRTX_CALLABLE float __direct_callable__rayMarchVolumeStructuredRectilinear(
   SamplerStateBox<StructuredRectilinearSamplerState> stateBox;
   auto &samplerState = stateBox.state;
   initStructuredRectilinearSampler(samplerState, &field);
-  return detail::latticeRayMarchVolume(*ss,
-      *hit,
-      samplerState,
-      field,
-      color,
-      normal,
-      *opacity,
-      invSamplingRate,
-      [] __device__(const StructuredRectilinearSamplerState &s,
-          const SpatialFieldGPUData &,
-          const vec3 &p) {
-        return sampleStructuredRectilinear(s, &p, nullptr);
-      },
-      [] __device__(const StructuredRectilinearSamplerState &s,
-          const SpatialFieldGPUData &,
-          const vec3 &p,
-          vec3 &g) { return sampleStructuredRectilinear(s, &p, &g); });
+  return detail::latticeRayMarchVolume(
+      *ss, *hit, samplerState, field, color, normal, *opacity, invSamplingRate);
 }
diff --git a/devices/rtx/device/spatial_field/StructuredRegularSamplerInline.h b/devices/rtx/device/spatial_field/StructuredRegularSamplerInline.h
index b59e76d8c..f629fac89 100644
--- a/devices/rtx/device/spatial_field/StructuredRegularSamplerInline.h
+++ b/devices/rtx/device/spatial_field/StructuredRegularSamplerInline.h
@@ -50,34 +50,38 @@ VISRTX_DEVICE void initStructuredRegularSampler(
       vec3(field->data.structuredRegular.cellCentered ? 0.0f : 0.5f);
 }
 
-VISRTX_DEVICE float sampleStructuredRegular(
-    const StructuredRegularSamplerState &state,
-    const vec3 *location,
-    vec3 *gradient)
+// Shared per-sample API (see gpu/volumeIntegrationDetail.h). sampleValue
+// returns the field value; sampleNormal returns the unnormalized object-space
+// gradient (the raw normal direction) — callers orient and normalize. `field`
+// is unused for built-in fields (present for a uniform overload set).
+VISRTX_DEVICE float sampleValue(const StructuredRegularSamplerState &state,
+    const SpatialFieldGPUData &,
+    const vec3 &p)
 {
-  const auto texelCoords = (*location - state.origin) * state.invSpacing;
-  const auto coords = texelCoords + state.offset;
-  const float value = tex3D<float>(state.texObj, coords.x, coords.y, coords.z);
-
-  if (gradient) {
-    const auto px = coords + vec3(1, 0, 0);
-    const auto nx = coords - vec3(1, 0, 0);
-    const auto py = coords + vec3(0, 1, 0);
-    const auto ny = coords - vec3(0, 1, 0);
-    const auto pz = coords + vec3(0, 0, 1);
-    const auto nz = coords - vec3(0, 0, 1);
+  const auto coords = (p - state.origin) * state.invSpacing + state.offset;
+  return tex3D<float>(state.texObj, coords.x, coords.y, coords.z);
+}
 
-    const float sxp = tex3D<float>(state.texObj, px.x, px.y, px.z);
-    const float sxn = tex3D<float>(state.texObj, nx.x, nx.y, nx.z);
-    const float syp = tex3D<float>(state.texObj, py.x, py.y, py.z);
-    const float syn = tex3D<float>(state.texObj, ny.x, ny.y, ny.z);
-    const float szp = tex3D<float>(state.texObj, pz.x, pz.y, pz.z);
-    const float szn = tex3D<float>(state.texObj, nz.x, nz.y, nz.z);
+VISRTX_DEVICE vec3 sampleNormal(const StructuredRegularSamplerState &state,
+    const SpatialFieldGPUData &,
+    const vec3 &p)
+{
+  const auto coords = (p - state.origin) * state.invSpacing + state.offset;
+  const auto px = coords + vec3(1, 0, 0);
+  const auto nx = coords - vec3(1, 0, 0);
+  const auto py = coords + vec3(0, 1, 0);
+  const auto ny = coords - vec3(0, 1, 0);
+  const auto pz = coords + vec3(0, 0, 1);
+  const auto nz = coords - vec3(0, 0, 1);
 
-    *gradient = vec3(sxp - sxn, syp - syn, szp - szn) * state.invSpacing * 0.5f;
-  }
+  const float sxp = tex3D<float>(state.texObj, px.x, px.y, px.z);
+  const float sxn = tex3D<float>(state.texObj, nx.x, nx.y, nx.z);
+  const float syp = tex3D<float>(state.texObj, py.x, py.y, py.z);
+  const float syn = tex3D<float>(state.texObj, ny.x, ny.y, ny.z);
+  const float szp = tex3D<float>(state.texObj, pz.x, pz.y, pz.z);
+  const float szn = tex3D<float>(state.texObj, nz.x, nz.y, nz.z);
 
-  return value;
+  return vec3(sxp - sxn, syp - syn, szp - szn) * state.invSpacing * 0.5f;
 }
 
 } // namespace visrtx
diff --git a/devices/rtx/device/spatial_field/StructuredRegularSampler_ptx.cu b/devices/rtx/device/spatial_field/StructuredRegularSampler_ptx.cu
index 07aedc3e1..7072ca9a3 100644
--- a/devices/rtx/device/spatial_field/StructuredRegularSampler_ptx.cu
+++ b/devices/rtx/device/spatial_field/StructuredRegularSampler_ptx.cu
@@ -30,28 +30,15 @@
  */
 
 // OptiX direct-callable entry points for the structured regular sampler.
-// Implementations live in StructuredRegularSamplerInline.h.
+// Only the Woodcock-body callables are exposed via the SBT; value/normal/init
+// sampling stays inline (StructuredRegularSamplerInline.h) since the bodies
+// below resolve sampleValue/sampleNormal by ADL on the concrete state type.
 
 #include "StructuredRegularSamplerInline.h"
 #include "gpu/volumeIntegrationDetail.h"
 
 using namespace visrtx;
 
-VISRTX_CALLABLE void __direct_callable__initStructuredRegularSampler(
-    VolumeSamplingState *samplerState, const SpatialFieldGPUData *field)
-{
-  initStructuredRegularSampler(samplerState->structuredRegular, field);
-}
-
-VISRTX_CALLABLE float __direct_callable__sampleStructuredRegular(
-    const VolumeSamplingState *samplerState,
-    const vec3 *location,
-    vec3 *gradient)
-{
-  return sampleStructuredRegular(
-      samplerState->structuredRegular, location, gradient);
-}
-
 // Woodcock-body callables — single variant, no macro fan-out.
 VISRTX_CALLABLE float __direct_callable__sampleDistanceStructuredRegular(
     ScreenSample *ss,
@@ -73,14 +60,7 @@ VISRTX_CALLABLE float __direct_callable__sampleDistanceStructuredRegular(
       *albedo,
       *extinction,
       *didScatter,
-      normal,
-      [] __device__(const StructuredRegularSamplerState &s,
-          const SpatialFieldGPUData &,
-          const vec3 &p) { return sampleStructuredRegular(s, &p, nullptr); },
-      [] __device__(const StructuredRegularSamplerState &s,
-          const SpatialFieldGPUData &,
-          const vec3 &p,
-          vec3 &g) { return sampleStructuredRegular(s, &p, &g); });
+      normal);
 }
 
 VISRTX_CALLABLE void
@@ -92,14 +72,8 @@ __direct_callable__ratioTrackTransmittanceStructuredRegular(
   SamplerStateBox<StructuredRegularSamplerState> stateBox;
   auto &samplerState = stateBox.state;
   initStructuredRegularSampler(samplerState, &field);
-  detail::woodcockRatioTrackTransmittance(*ss,
-      *hit,
-      samplerState,
-      field,
-      *attenuation,
-      [] __device__(const StructuredRegularSamplerState &s,
-          const SpatialFieldGPUData &,
-          const vec3 &p) { return sampleStructuredRegular(s, &p, nullptr); });
+  detail::woodcockRatioTrackTransmittance(
+      *ss, *hit, samplerState, field, *attenuation);
 }
 
 VISRTX_CALLABLE float __direct_callable__rayMarchVolumeStructuredRegular(
@@ -115,19 +89,6 @@ VISRTX_CALLABLE float __direct_callable__rayMarchVolumeStructuredRegular(
   SamplerStateBox<StructuredRegularSamplerState> stateBox;
   auto &samplerState = stateBox.state;
   initStructuredRegularSampler(samplerState, &field);
-  return detail::latticeRayMarchVolume(*ss,
-      *hit,
-      samplerState,
-      field,
-      color,
-      normal,
-      *opacity,
-      invSamplingRate,
-      [] __device__(const StructuredRegularSamplerState &s,
-          const SpatialFieldGPUData &,
-          const vec3 &p) { return sampleStructuredRegular(s, &p, nullptr); },
-      [] __device__(const StructuredRegularSamplerState &s,
-          const SpatialFieldGPUData &,
-          const vec3 &p,
-          vec3 &g) { return sampleStructuredRegular(s, &p, &g); });
+  return detail::latticeRayMarchVolume(
+      *ss, *hit, samplerState, field, color, normal, *opacity, invSamplingRate);
 }
diff --git a/devices/rtx/device/spatial_field/space_skipping/UniformGrid.cu b/devices/rtx/device/spatial_field/space_skipping/UniformGrid.cu
index 4814d90c5..f616b53b2 100644
--- a/devices/rtx/device/spatial_field/space_skipping/UniformGrid.cu
+++ b/devices/rtx/device/spatial_field/space_skipping/UniformGrid.cu
@@ -38,6 +38,14 @@
 
 namespace visrtx {
 
+// Defined in UniformGridCustom.cu (a separate TU so it can include the custom
+// sampler dispatch header). Default stream; the cudaFree below synchronizes.
+void launchCustomValueRanges(box1 *valueRanges,
+    ivec3 mcDims,
+    box3 objectBounds,
+    const SpatialFieldGPUData *dSfgd,
+    cudaStream_t stream);
+
 __global__ void computeOpacityBoundsGPU(float2 *opacityBounds,
     const box1 *valueRanges,
     cudaTextureObject_t colorMap,
@@ -210,6 +218,10 @@ void UniformGrid::computeValueRanges(const SpatialFieldGPUData &sfgd)
   case SbtCallableEntryPoints::SpatialFieldSamplerNvdbRectilinearFloat:
     LAUNCH_BUILD_GRID(NvdbRectilinearSpatialFieldAccessor<float>);
     break;
+  case SbtCallableEntryPoints::SpatialFieldSamplerCustom:
+    launchCustomValueRanges(
+        m_valueRanges, m_dims, m_objectBounds, sfgdDevice, /*stream=*/0);
+    break;
   default:
     break;
   }
diff --git a/devices/rtx/device/spatial_field/space_skipping/UniformGridCustom.cu b/devices/rtx/device/spatial_field/space_skipping/UniformGridCustom.cu
new file mode 100644
index 000000000..94f443ef5
--- /dev/null
+++ b/devices/rtx/device/spatial_field/space_skipping/UniformGridCustom.cu
@@ -0,0 +1,175 @@
+/*
+ * Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+// Per-macrocell value-range grid for custom spatial fields. A plain CUDA
+// kernel cannot optixDirectCall the custom SBT sampler, but the user sample
+// function is an inline __device__ function reachable from any TU that includes
+// the dispatch header — so this TU re-expands VISRTX_CUSTOM_SAMPLE_DISPATCH
+// directly. Compiled into the library unconditionally; only active when the
+// custom-field provider's CMake defines VISRTX_CUSTOM_SAMPLERS_HEADER /
+// VISRTX_CUSTOM_FIELD_DATA_HEADER on the library target (else a stub).
+
+#include <cuda_runtime_api.h>
+#include <limits>
+#include "UniformGrid.h"
+#include "gpu/gpu_decl.h"
+#include "gpu/gpu_math.h"
+#include "gpu/gpu_objects.h"
+
+#ifdef VISRTX_CUSTOM_FIELD_DATA_HEADER
+#include VISRTX_CUSTOM_FIELD_DATA_HEADER
+#endif
+#ifdef VISRTX_CUSTOM_SAMPLERS_HEADER
+#include VISRTX_CUSTOM_SAMPLERS_HEADER
+#endif
+
+namespace visrtx {
+
+#ifdef VISRTX_CUSTOM_SAMPLERS_HEADER
+
+namespace {
+
+constexpr int CUSTOM_VALUE_RANGE_SUPERSAMPLE = 4; // S; S^3 samples per cell (fallback)
+
+// Only the no-dispatch supersample fallback (the kernel's #else below) samples
+// the field directly; guard the helper to that case so it isn't compiled
+// (and flagged unreferenced) when a value-range dispatch is provided.
+#if !defined(VISRTX_CUSTOM_VALUE_RANGE_DISPATCH)                                \
+    && !defined(VISRTX_CUSTOM_GLOBAL_VALUE_RANGE_DISPATCH)
+VISRTX_DEVICE float sampleCustomValue(
+    const CustomFieldData &data, const vec3 &P)
+{
+#ifdef VISRTX_CUSTOM_SAMPLE_DISPATCH
+  VISRTX_CUSTOM_SAMPLE_DISPATCH(data, P)
+#else
+  return 0.0f;
+#endif
+}
+#endif
+
+#ifdef VISRTX_CUSTOM_VALUE_RANGE_DISPATCH
+// Field-supplied conservative value interval over an object-space AABB. Both
+// ends are real bounds; the volume cannot vanish or leak.
+VISRTX_DEVICE box1 customCellValueRange(
+    const CustomFieldData &data, const vec3 &boxLo, const vec3 &boxHi)
+{
+  VISRTX_CUSTOM_VALUE_RANGE_DISPATCH(data, boxLo, boxHi)
+}
+#elif defined(VISRTX_CUSTOM_GLOBAL_VALUE_RANGE_DISPATCH)
+// Field-supplied conservative interval over the whole domain. Constant per cell
+// (no space skipping) but never wrong.
+VISRTX_DEVICE box1 customGlobalValueRange(const CustomFieldData &data)
+{
+  VISRTX_CUSTOM_GLOBAL_VALUE_RANGE_DISPATCH(data)
+}
+#endif
+
+// Per macrocell, emit a conservative value interval over the cell's object-space
+// AABB. Single pass — no lower-bound fabrication needed.
+__global__ void customCellRangeGPU(box1 *valueRanges,
+    ivec3 mcDims,
+    box3 objectBounds,
+    const SpatialFieldGPUData *sfgd,
+    int S)
+{
+  size_t threadID = blockIdx.x * size_t(blockDim.x) + threadIdx.x;
+  size_t numMCs = size_t(mcDims.x) * mcDims.y * mcDims.z;
+  if (threadID >= numMCs)
+    return;
+
+  ivec3 mcID(threadID % mcDims.x,
+      threadID / mcDims.x % mcDims.y,
+      threadID / (size_t(mcDims.x) * mcDims.y));
+
+  const vec3 ext = objectBounds.upper - objectBounds.lower;
+  const vec3 normLo = vec3(mcID) / vec3(mcDims);
+  const vec3 normHi = vec3(mcID + ivec3(1)) / vec3(mcDims);
+  const vec3 objLo = objectBounds.lower + normLo * ext;
+  const vec3 objHi = objectBounds.lower + normHi * ext;
+
+  const CustomFieldData &data = sfgd->data.custom;
+
+#ifdef VISRTX_CUSTOM_VALUE_RANGE_DISPATCH
+  (void)S;
+  valueRanges[threadID] = customCellValueRange(data, objLo, objHi);
+#elif defined(VISRTX_CUSTOM_GLOBAL_VALUE_RANGE_DISPATCH)
+  (void)S;
+  valueRanges[threadID] = customGlobalValueRange(data);
+#else
+#warning \
+    "Custom field defines samplers but no value-range dispatch; macrocell " \
+    "bounds are best-effort point-supersampled and MAY mis-bound the true " \
+    "field (volume can render too transparent / dark / vanish). Define " \
+    "VISRTX_CUSTOM_VALUE_RANGE_DISPATCH (tight per-AABB) or " \
+    "VISRTX_CUSTOM_GLOBAL_VALUE_RANGE_DISPATCH (conservative constant) for a " \
+    "correctness guarantee."
+  // Best-effort: point-supersample real lo AND hi (no longer clamped to 0).
+  const vec3 cellExt = objHi - objLo;
+  float lo = std::numeric_limits<float>::infinity();
+  float hi = -std::numeric_limits<float>::infinity();
+  for (int iz = 0; iz < S; ++iz)
+    for (int iy = 0; iy < S; ++iy)
+      for (int ix = 0; ix < S; ++ix) {
+        const vec3 frac((ix + 0.5f) / S, (iy + 0.5f) / S, (iz + 0.5f) / S);
+        const float v = sampleCustomValue(data, objLo + frac * cellExt);
+        if (!isnan(v) && !isinf(v)) {
+          lo = fminf(lo, v);
+          hi = fmaxf(hi, v);
+        }
+      }
+  valueRanges[threadID] = (lo <= hi)
+      ? box1{lo, hi}
+      : box1{std::numeric_limits<float>::infinity(),
+             -std::numeric_limits<float>::infinity()};
+#endif
+}
+
+} // namespace
+
+void launchCustomValueRanges(box1 *valueRanges,
+    ivec3 mcDims,
+    box3 objectBounds,
+    const SpatialFieldGPUData *dSfgd,
+    cudaStream_t stream)
+{
+  size_t numMCs = size_t(mcDims.x) * mcDims.y * mcDims.z;
+  if (numMCs == 0)
+    return;
+  const int threads = 256;
+  const int blocks = int(iDivUp(int64_t(numMCs), threads));
+  customCellRangeGPU<<<blocks, threads, 0, stream>>>(
+      valueRanges, mcDims, objectBounds, dSfgd, CUSTOM_VALUE_RANGE_SUPERSAMPLE);
+}
+
+#else // no custom field configured: write empty sentinel everywhere
+
+namespace {
+__global__ void customEmptyGPU(box1 *valueRanges, size_t numMCs)
+{
+  size_t threadID = blockIdx.x * size_t(blockDim.x) + threadIdx.x;
+  if (threadID >= numMCs)
+    return;
+  valueRanges[threadID].lower = std::numeric_limits<float>::infinity();
+  valueRanges[threadID].upper = -std::numeric_limits<float>::infinity();
+}
+} // namespace
+
+void launchCustomValueRanges(box1 *valueRanges,
+    ivec3 mcDims,
+    box3 /*objectBounds*/,
+    const SpatialFieldGPUData * /*dSfgd*/,
+    cudaStream_t stream)
+{
+  size_t numMCs = size_t(mcDims.x) * mcDims.y * mcDims.z;
+  if (numMCs == 0)
+    return;
+  const int threads = 256;
+  const int blocks = int(iDivUp(int64_t(numMCs), threads));
+  customEmptyGPU<<<blocks, threads, 0, stream>>>(valueRanges, numMCs);
+}
+
+#endif
+
+} // namespace visrtx
diff --git a/tsd/apps/interactive/demos/customField/CMakeLists.txt b/tsd/apps/interactive/demos/customField/CMakeLists.txt
index c0aaf35a0..383647f54 100644
--- a/tsd/apps/interactive/demos/customField/CMakeLists.txt
+++ b/tsd/apps/interactive/demos/customField/CMakeLists.txt
@@ -38,6 +38,13 @@ if (TARGET CustomFieldSampler_ptx)
   )
 endif()
 
+## UniformGridCustom.cu (core library source) needs the same dispatch headers
+## to build per-cell majorants for the custom field.
+target_compile_definitions(anari_library_visrtx PRIVATE
+  VISRTX_CUSTOM_FIELD_DATA_HEADER="WeightedPointsFieldData.h"
+  VISRTX_CUSTOM_SAMPLERS_HEADER="WeightedPointsFieldDispatch.h"
+)
+
 ## ---------------------------------------------------------------------------
 ## 2. Build the demo application
 ## ---------------------------------------------------------------------------
diff --git a/tsd/apps/interactive/demos/customField/README.md b/tsd/apps/interactive/demos/customField/README.md
index 1cf27c5b3..1e916c582 100644
--- a/tsd/apps/interactive/demos/customField/README.md
+++ b/tsd/apps/interactive/demos/customField/README.md
@@ -20,17 +20,35 @@ build time:
    handles parameter parsing (`commitParameters`), host-to-device data transfer
    (`finalize`), and spatial metadata (`bounds`, `stepSize`).
 
-3. **Write the GPU sampler**: A `__device__` function that evaluates the field
-   at an arbitrary 3D point, dispatched via the
+3. **Write the GPU sampler**: A `__host__ __device__` function that evaluates
+   the field at an arbitrary 3D point, dispatched via the
    `VISRTX_CUSTOM_SAMPLE_DISPATCH` macro.
 
-4. **Register at static init**: A small registration file calls
+4. **Supply a conservative value range** (optional but recommended): A function
+   returning a `{lo, hi}` interval that bounds the field value, used by VisRTX
+   to build a per-macrocell space-skipping grid for delta tracking. Two hooks
+   are available — define exactly one:
+
+   | Macro | Signature | Trade-off |
+   |-------|-----------|-----------|
+   | `VISRTX_CUSTOM_VALUE_RANGE_DISPATCH(data, boxLo, boxHi)` | `box1` over an object-space AABB | Tight per-cell bounds → best space skipping |
+   | `VISRTX_CUSTOM_GLOBAL_VALUE_RANGE_DISPATCH(data)` | `box1` over the whole domain | Constant bound, zero extra cost → no space skipping |
+
+   Without either hook the engine falls back to point-supersampling each cell,
+   which can mis-bound the field (the volume may render too dark or vanish). See
+   *Space skipping & majorants* below.
+
+5. **Register at static init**: A small registration file calls
    `visrtx::registerCustomField("subtypeName", factory)`, which inserts the
    type into the `SpatialFieldRegistry`. The ANARI device discovers it at
    runtime when `anariNewSpatialField(device, "subtypeName")` is called.
 
-All four pieces are compiled into `libanari_library_visrtx.so` via CMake
-`target_sources`, keeping the core VisRTX codebase unchanged.
+The sample and value-range macros, plus their `__device__` helpers, live in a
+single dispatch header (`WeightedPointsFieldDispatch.h`) that CMake wires into
+the core library via the `VISRTX_CUSTOM_FIELD_DATA_HEADER` and
+`VISRTX_CUSTOM_SAMPLERS_HEADER` compile definitions. All pieces are compiled
+into `libanari_library_visrtx.so` via CMake `target_sources`, keeping the core
+VisRTX codebase unchanged.
 
 ## The Weighted Points Field
 
@@ -62,6 +80,27 @@ This gives O(log N) sampling cost with controllable quality via two parameters:
 | `sigma` | Gaussian kernel width (Å). Controls how "blobby" each atom appears. Auto-computed from median nearest-neighbor distance. |
 | `cutoff` | LOD distance threshold (Å). Nodes farther than this from the sample point are approximated by their aggregate. Auto-computed from domain diagonal. |
 
+### Space skipping & majorants
+
+VisRTX renders volumes with delta tracking, which needs a conservative `{lo,
+hi}` value interval over each region it steps through. For custom fields this
+comes from one of the two value-range hooks described in step 4:
+
+- **`VISRTX_CUSTOM_VALUE_RANGE_DISPATCH`** — per-AABB interval, enables tight
+  space skipping. Used by this demo via `ValueRangeWeightedPoints.cuh`.
+- **`VISRTX_CUSTOM_GLOBAL_VALUE_RANGE_DISPATCH`** — single domain-wide interval,
+  simpler to implement but provides no per-cell empty-space culling.
+
+The weighted points field implements the per-AABB variant by summing, over *all*
+octree nodes, each node's largest Gaussian contribution within the AABB
+(evaluated at the box point closest to the node center).
+
+This is provably conservative — the sampler's value at any point is a sum over
+an LOD cut, which is a *subset* of all nodes, so the all-nodes sum can only
+over-estimate. The result is a per-macrocell `{0, hi}` grid that lets the
+renderer skip empty space without ever under-bounding the field. See
+`fields/samplers/ValueRangeWeightedPoints.cuh` for the derivation.
+
 ### GPU data layout
 
 The octree is serialized into two flat arrays for GPU consumption:
@@ -71,6 +110,10 @@ The octree is serialized into two flat arrays for GPU consumption:
 - **indices** (`int32×2 per node`): `[childBegin, childEnd)`: index range of
   children in the values/indices arrays. Leaves have `(0, 0)`.
 
+The `WeightedPointsFieldData` struct also carries the precomputed `1/(2σ²)`
+factor and the conservative global `maxValue` used as the value-range fallback
+when the octree is empty.
+
 ## PDB File Support
 
 The demo can load atomic coordinates from **Protein Data Bank (PDB)** files,
@@ -86,6 +129,12 @@ tsdDemoCustomField --pdb /path/to/structure.pdb
 
 Or without arguments for a random point cloud.
 
+The ImGui controls panel exposes `sigma`/`cutoff`, the transfer function, and an
+**Animation** section that perturbs the points over time (amplitude expressed in
+multiples of the median nearest-neighbor distance, so motion scales with the
+data). The octree is rebuilt each frame via a fast path that keeps blob size
+stable across the animation.
+
 ### Why weighted points for molecular data?
 
 Traditional molecular viewers render atoms as discrete spheres or stick models.
@@ -110,11 +159,14 @@ The weighted points field offers a complementary visualization:
 customField/
 ├── fields/
 │   ├── WeightedPointsFieldData.h        # GPU data struct (shared CPU/GPU)
-│   ├── WeightedPointsFieldDispatch.h    # OptiX sampler dispatch macro
+│   ├── WeightedPointsFieldDispatch.h    # Sample + value-range dispatch macros
+│   ├── samplers/
+│   │   ├── SampleWeightedPoints.cuh     # __host__ __device__ field evaluator
+│   │   └── ValueRangeWeightedPoints.cuh # Conservative per-AABB value interval
 │   ├── WeightedPointsField.h/cpp        # Host-side ANARI object
 │   └── RegisterWeightedPointsField.cpp  # Static registration
 ├── WeightedPointsOctree.h/cpp           # CPU octree builder
-├── WeightedPointsControls.h/cpp         # ImGui UI panel + PDB loader
+├── WeightedPointsControls.h/cpp         # ImGui UI panel + PDB loader + animation
 ├── tsdDemoCustomField.cpp               # Application entry point
 ├── CMakeLists.txt                       # Build configuration
 └── README.md                            # This file
diff --git a/tsd/apps/interactive/demos/customField/WeightedPointsControls.cpp b/tsd/apps/interactive/demos/customField/WeightedPointsControls.cpp
index 545344d15..4822d7a5e 100644
--- a/tsd/apps/interactive/demos/customField/WeightedPointsControls.cpp
+++ b/tsd/apps/interactive/demos/customField/WeightedPointsControls.cpp
@@ -116,8 +116,8 @@ void WeightedPointsControls::buildUI()
         "Amplitude", &m_perturbAmplitude, 0.01f, 0.f, 20.f, "%.3f");
     if (ImGui::IsItemHovered()) {
       ImGui::SetTooltip(
-          "Maximum displacement of points from their original positions "
-          "(in data units). Scaled by median nearest-neighbor distance.");
+          "Maximum point displacement, in multiples of the median "
+          "nearest-neighbor distance (1 ~= one neighbor spacing).");
     }
 
     ImGui::DragFloat(
@@ -156,6 +156,30 @@ void WeightedPointsControls::createScene()
   auto *layer = scene.defaultLayer();
   layer->clear();
 
+  // Tear down the previous scene before rebuilding (Regenerate / source change).
+  // Remove the parameter-holding objects (volume, field) before the arrays they
+  // reference, so their PARAMETER uses are released against live pool slots.
+  // Leaving them stranded lets their stale parameter handles later decrement
+  // reused array slots (the "decUseCount on zero use count" warnings).
+  if (m_volume)
+    scene.removeObject(m_volume.data());
+  if (m_field)
+    scene.removeObject(m_field.data());
+  if (m_light)
+    scene.removeObject(m_light);
+  if (m_valuesArrayRef)
+    scene.removeObject(m_valuesArrayRef.data());
+  if (m_indicesArrayRef)
+    scene.removeObject(m_indicesArrayRef.data());
+  if (m_colorArrayRef)
+    scene.removeObject(m_colorArrayRef.data());
+  m_volume = {};
+  m_field = {};
+  m_light = nullptr;
+  m_valuesArrayRef = {};
+  m_indicesArrayRef = {};
+  m_colorArrayRef = {};
+
   generatePoints();
   m_originalPoints = m_rawPoints;
   m_animationSetup = false;
@@ -217,6 +241,7 @@ void WeightedPointsControls::createScene()
   }
   colorArray->unmap();
   m_volume->setParameterObject("color", *colorArray);
+  m_colorArrayRef = colorArray; // tracked so it is released on the next rebuild
 
   layer->root()->insert_first_child({layer, m_volume});
 
@@ -241,6 +266,29 @@ void WeightedPointsControls::generatePoints()
     m_rawPoints = generateRandomUniform();
 }
 
+void WeightedPointsControls::swapFieldArrays(
+    const std::vector<float> &values, const std::vector<int32_t> &indices)
+{
+  auto &scene = appContext()->tsd.scene;
+
+  auto newValues = scene.createArray(ANARI_FLOAT32, values.size());
+  newValues->setData(values.data());
+  m_field->setParameterObject("values", *newValues);
+
+  auto newIndices = scene.createArray(ANARI_INT32, indices.size());
+  newIndices->setData(indices.data());
+  m_field->setParameterObject("indices", *newIndices);
+
+  // Release the previous arrays now that the field references the new ones —
+  // otherwise each animation frame leaks a values/indices pair into the scene.
+  if (m_valuesArrayRef)
+    scene.removeObject(m_valuesArrayRef.data());
+  if (m_indicesArrayRef)
+    scene.removeObject(m_indicesArrayRef.data());
+  m_valuesArrayRef = newValues;
+  m_indicesArrayRef = newIndices;
+}
+
 void WeightedPointsControls::rebuildField()
 {
   if (!m_field || !m_volume || m_rawPoints.empty())
@@ -279,9 +327,11 @@ void WeightedPointsControls::rebuildField()
     domainMax[c] = bmax[c] + pad;
   }
 
-  float sigma = m_sigmaOverride;
-  if (sigma <= 0.f) {
-    // Estimate typical inter-atom spacing via sampled nearest-neighbor distances
+  // Median nearest-neighbor distance: drives auto-sigma AND the animation
+  // amplitude scale (perturbPoints displaces in multiples of it), so it is
+  // computed regardless of whether sigma is overridden.
+  float medianNN = 1.f;
+  {
     size_t nSamp = std::min(n, (size_t)200);
     size_t sStep = std::max((size_t)1, n / nSamp);
     std::vector<float> nnDists;
@@ -302,11 +352,12 @@ void WeightedPointsControls::rebuildField()
         nnDists.push_back(std::sqrt(best));
     }
     std::sort(nnDists.begin(), nnDists.end());
-    float medianNN = nnDists.empty()
-        ? 1.f
-        : nnDists[nnDists.size() / 2];
-    sigma = medianNN * 3.f;
+    if (!nnDists.empty())
+      medianNN = nnDists[nnDists.size() / 2];
   }
+  m_medianNN = medianNN;
+
+  float sigma = (m_sigmaOverride > 0.f) ? m_sigmaOverride : medianNN * 3.f;
 
   float cutoff = m_cutoffOverride;
   if (cutoff <= 0.f) {
@@ -323,14 +374,9 @@ void WeightedPointsControls::rebuildField()
       "domainMax", tsd::math::float3(domainMax[0], domainMax[1], domainMax[2]));
   m_field->setParameter("sigma", sigma);
   m_field->setParameter("cutoff", cutoff);
+  m_effectiveSigma = sigma;
 
-  auto valuesArray = scene.createArray(ANARI_FLOAT32, flatValues.size());
-  valuesArray->setData(flatValues.data());
-  m_field->setParameterObject("values", *valuesArray);
-
-  auto indicesArray = scene.createArray(ANARI_INT32, flatIndices.size());
-  indicesArray->setData(flatIndices.data());
-  m_field->setParameterObject("indices", *indicesArray);
+  swapFieldArrays(flatValues, flatIndices);
 
   float inv2s2 = 1.f / (2.f * sigma * sigma);
   float maxFieldVal = 0.f;
@@ -363,7 +409,7 @@ void WeightedPointsControls::rebuildField()
 
 void WeightedPointsControls::rebuildFieldFast()
 {
-  if (!m_field || m_rawPoints.empty())
+  if (!m_field || !m_volume || m_rawPoints.empty())
     return;
 
   auto &scene = appContext()->tsd.scene;
@@ -378,20 +424,46 @@ void WeightedPointsControls::rebuildFieldFast()
 
   WeightedPointsOctree octree;
   octree.build(points, 8, 12);
-
-  const auto &flatValues = octree.flatValues();
-  const auto &flatIndices = octree.flatIndices();
-
   if (octree.numNodes() == 0)
     return;
 
-  auto valuesArray = scene.createArray(ANARI_FLOAT32, flatValues.size());
-  valuesArray->setData(flatValues.data());
-  m_field->setParameterObject("values", *valuesArray);
+  // Track the perturbed point bounds so the field domain — and thus the
+  // volume's rendered AABB — follows the animation. Without this the points
+  // drift outside the original (fixed) domain and the volume goes blank.
+  const float *bmin = octree.boundsMin();
+  const float *bmax = octree.boundsMax();
+  float dmin[3], dmax[3];
+  for (int c = 0; c < 3; c++) {
+    float pad = std::max(0.1f, (bmax[c] - bmin[c]) * 0.05f);
+    dmin[c] = bmin[c] - pad;
+    dmax[c] = bmax[c] + pad;
+  }
+  m_field->setParameter(
+      "domainMin", tsd::math::float3(dmin[0], dmin[1], dmin[2]));
+  m_field->setParameter(
+      "domainMax", tsd::math::float3(dmax[0], dmax[1], dmax[2]));
+
+  swapFieldArrays(octree.flatValues(), octree.flatIndices());
 
-  auto indicesArray = scene.createArray(ANARI_INT32, flatIndices.size());
-  indicesArray->setData(flatIndices.data());
-  m_field->setParameterObject("indices", *indicesArray);
+  // Keep the TF range matched to the current field max so opacity doesn't wash
+  // out as points spread/cluster. Sigma is held fixed (m_effectiveSigma) so the
+  // blob size stays stable across frames.
+  const float inv2s2 = 1.f / (2.f * m_effectiveSigma * m_effectiveSigma);
+  float maxFieldVal = 0.f;
+  size_t nSamples = std::min(n, (size_t)50);
+  size_t step = std::max((size_t)1, n / nSamples);
+  for (size_t i = 0; i < n; i += step) {
+    const float *p = m_rawPoints.data() + i * 4;
+    float val = 0.f;
+    for (size_t j = 0; j < n; j++) {
+      const float *q = m_rawPoints.data() + j * 4;
+      float dx = p[0] - q[0], dy = p[1] - q[1], dz = p[2] - q[2];
+      val += q[3] * std::exp(-(dx * dx + dy * dy + dz * dz) * inv2s2);
+    }
+    maxFieldVal = std::max(maxFieldVal, val);
+  }
+  tsd::math::float2 valueRange(0.f, maxFieldVal > 0.f ? maxFieldVal : 1.f);
+  m_volume->setParameter("valueRange", ANARI_FLOAT32_BOX1, &valueRange);
 
   scene.signalLayerStructureChanged(scene.defaultLayer());
 }
@@ -419,6 +491,11 @@ void WeightedPointsControls::perturbPoints(float t)
 
   m_rawPoints.resize(m_originalPoints.size());
 
+  // Amplitude is in multiples of the median nearest-neighbor distance, so the
+  // motion scales with the point cloud (a value of ~1 wiggles each point by
+  // roughly one neighbor spacing) instead of flinging them off in raw units.
+  const float amp = m_perturbAmplitude * m_medianNN;
+
   for (size_t i = 0; i < n; i++) {
     size_t base = i * 4;
     float seed = static_cast<float>(i);
@@ -427,9 +504,9 @@ void WeightedPointsControls::perturbPoints(float t)
     float dy = std::cos(phase + seed * 2.3f) * std::sin(seed * 0.7f);
     float dz = std::sin(phase + seed * 3.1f) * std::cos(seed * 1.1f);
 
-    m_rawPoints[base + 0] = m_originalPoints[base + 0] + dx * m_perturbAmplitude;
-    m_rawPoints[base + 1] = m_originalPoints[base + 1] + dy * m_perturbAmplitude;
-    m_rawPoints[base + 2] = m_originalPoints[base + 2] + dz * m_perturbAmplitude;
+    m_rawPoints[base + 0] = m_originalPoints[base + 0] + dx * amp;
+    m_rawPoints[base + 1] = m_originalPoints[base + 1] + dy * amp;
+    m_rawPoints[base + 2] = m_originalPoints[base + 2] + dz * amp;
     m_rawPoints[base + 3] = m_originalPoints[base + 3];
   }
 
diff --git a/tsd/apps/interactive/demos/customField/WeightedPointsControls.h b/tsd/apps/interactive/demos/customField/WeightedPointsControls.h
index d81f1af87..5f4cb3b33 100644
--- a/tsd/apps/interactive/demos/customField/WeightedPointsControls.h
+++ b/tsd/apps/interactive/demos/customField/WeightedPointsControls.h
@@ -7,6 +7,7 @@
 #include <tsd/ui/imgui/windows/Window.h>
 #include <string>
 #include <tsd/scene/Object.hpp>
+#include <tsd/scene/objects/Array.hpp>
 #include <vector>
 
 namespace tsd::demo {
@@ -27,6 +28,10 @@ struct WeightedPointsControls : public tsd::ui::imgui::Window
   void setupAnimation();
   void perturbPoints(float t);
 
+  // Swap the field's values/indices arrays, releasing the previous pair.
+  void swapFieldArrays(
+      const std::vector<float> &values, const std::vector<int32_t> &indices);
+
   std::vector<float> generateRandomUniform();
   std::vector<float> loadPDB(const std::string &path);
 
@@ -34,7 +39,7 @@ struct WeightedPointsControls : public tsd::ui::imgui::Window
   int m_numPoints{2000};
   float m_sigmaOverride{0.f};
   float m_cutoffOverride{0.f};
-  float m_perturbAmplitude{15.f};
+  float m_perturbAmplitude{1.f}; // in units of median nearest-neighbor distance
   float m_perturbFrequency{2.f};
 
   std::string m_pdbPath;
@@ -45,6 +50,18 @@ struct WeightedPointsControls : public tsd::ui::imgui::Window
   tsd::scene::VolumeRef m_volume;
   tsd::scene::Object *m_light{nullptr};
 
+  // Current field data arrays, tracked so the previous ones can be released
+  // each rebuild (otherwise every animation frame leaks a new pair).
+  tsd::scene::ArrayRef m_valuesArrayRef;
+  tsd::scene::ArrayRef m_indicesArrayRef;
+  tsd::scene::ArrayRef m_colorArrayRef;
+  // Gaussian width chosen by the last full rebuild; reused (not recomputed) by
+  // the per-frame fast path so blob size stays stable across the animation.
+  float m_effectiveSigma{1.f};
+  // Median nearest-neighbor distance from the last full rebuild; the animation
+  // amplitude is expressed in multiples of this so motion scales with the data.
+  float m_medianNN{1.f};
+
   std::vector<float> m_rawPoints;
   std::vector<float> m_originalPoints;
 };
diff --git a/tsd/apps/interactive/demos/customField/fields/WeightedPointsField.cpp b/tsd/apps/interactive/demos/customField/fields/WeightedPointsField.cpp
index 5a520fa9a..9c77c4f0b 100644
--- a/tsd/apps/interactive/demos/customField/fields/WeightedPointsField.cpp
+++ b/tsd/apps/interactive/demos/customField/fields/WeightedPointsField.cpp
@@ -90,12 +90,24 @@ void WeightedPointsField::finalize()
   if (cutoff <= 0.f)
     cutoff = 5.f * sigma;
 
+  // Conservative global upper bound for the dispatch fallback: any LOD cut is a
+  // subset of nodes, so summing the weights upper-bounds the field. (A single
+  // max weight is NOT valid: overlapping Gaussians sum beyond any single one.)
+  float maxValue = 0.f;
+  if (m_valuesArray) {
+    const float *hv = m_valuesArray->beginAs<float>();
+    const size_t nn = m_valuesArray->totalSize() / 4;
+    for (size_t i = 0; i < nn; i++)
+      maxValue += std::max(0.f, hv[i * 4 + 3]);
+  }
+
   m_deviceData.values = m_dValues;
   m_deviceData.indices = m_dIndices;
   m_deviceData.numNodes = m_numNodes;
   m_deviceData.sigma = sigma;
   m_deviceData.inv2SigmaSq = 1.f / (2.f * sigma * sigma);
   m_deviceData.cutoff = cutoff;
+  m_deviceData.maxValue = maxValue;
   m_deviceData.domainMin = m_domainMin;
   m_deviceData.domainMax = m_domainMax;
 
diff --git a/tsd/apps/interactive/demos/customField/fields/WeightedPointsFieldData.h b/tsd/apps/interactive/demos/customField/fields/WeightedPointsFieldData.h
index 717ceeedb..767dec536 100644
--- a/tsd/apps/interactive/demos/customField/fields/WeightedPointsFieldData.h
+++ b/tsd/apps/interactive/demos/customField/fields/WeightedPointsFieldData.h
@@ -18,6 +18,7 @@ struct WeightedPointsFieldData
   float sigma;               // Gaussian width
   float inv2SigmaSq;         // 1 / (2 * sigma^2), precomputed
   float cutoff;              // initial LOD distance
+  float maxValue;            // conservative global upper bound (fallback)
   visrtx::vec3 domainMin;
   visrtx::vec3 domainMax;
 };
diff --git a/tsd/apps/interactive/demos/customField/fields/WeightedPointsFieldDispatch.h b/tsd/apps/interactive/demos/customField/fields/WeightedPointsFieldDispatch.h
index 9e9598b94..24625eb98 100644
--- a/tsd/apps/interactive/demos/customField/fields/WeightedPointsFieldDispatch.h
+++ b/tsd/apps/interactive/demos/customField/fields/WeightedPointsFieldDispatch.h
@@ -4,6 +4,7 @@
 #pragma once
 
 #include "WeightedPointsFieldData.h"
+#include "samplers/ValueRangeWeightedPoints.cuh"
 #include "samplers/SampleWeightedPoints.cuh"
 
 #define VISRTX_CUSTOM_SAMPLE_DISPATCH(data, P) \
@@ -14,3 +15,15 @@
     default:                                       \
       return 0.0f;                                 \
   }
+
+#define VISRTX_CUSTOM_VALUE_RANGE_DISPATCH(data, boxLo, boxHi)                 \
+  switch (data.subType) {                                                      \
+    case visrtx::DEMO_WEIGHTED_POINTS_FIELD_TYPE:                              \
+      return valueRangeWeightedPoints(                                         \
+          *reinterpret_cast<const WeightedPointsFieldData*>(data.fieldData),   \
+          boxLo, boxHi);                                                       \
+    default: /* unknown subtype: global conservative interval, never vanish */ \
+      return visrtx::box1{0.f,                                                  \
+          reinterpret_cast<const WeightedPointsFieldData*>(                    \
+              data.fieldData)->maxValue};                                      \
+  }
diff --git a/tsd/apps/interactive/demos/customField/fields/samplers/ValueRangeWeightedPoints.cuh b/tsd/apps/interactive/demos/customField/fields/samplers/ValueRangeWeightedPoints.cuh
new file mode 100644
index 000000000..60e16578a
--- /dev/null
+++ b/tsd/apps/interactive/demos/customField/fields/samplers/ValueRangeWeightedPoints.cuh
@@ -0,0 +1,37 @@
+// Copyright 2025-2026 NVIDIA Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "../WeightedPointsFieldData.h"
+#include "gpu/gpu_math.h"
+
+// Conservative value interval over an object-space AABB for a non-negative
+// weighted-points field. The lower bound is always 0; the upper bound sums each
+// node's largest Gaussian contribution over the box (evaluated at the box point
+// closest to the node center). Conservative because the sampler's value at any
+// point is a sum over an LOD cut, which is a SUBSET of all nodes, and each
+// node's box maximum upper-bounds its contribution there.
+inline __host__ __device__ visrtx::box1 valueRangeWeightedPoints(
+    const WeightedPointsFieldData &field,
+    const visrtx::vec3 &boxLo,
+    const visrtx::vec3 &boxHi)
+{
+  if (!field.values || field.numNodes <= 0)
+    return visrtx::box1{0.f, field.maxValue};
+
+  const float k = field.inv2SigmaSq; // = 1/(2 sigma^2) > 0
+  float hi = 0.f;
+  for (int i = 0; i < field.numNodes; ++i) {
+    const float w = field.values[i * 4 + 3];
+    if (w <= 0.f)
+      continue;
+    const visrtx::vec3 c(field.values[i * 4 + 0],
+        field.values[i * 4 + 1],
+        field.values[i * 4 + 2]);
+    // Closest box point to the node center -> largest exp(-d^2 k) over the box.
+    const visrtx::vec3 dNear = c - glm::clamp(c, boxLo, boxHi);
+    hi += w * expf(-glm::dot(dNear, dNear) * k);
+  }
+  return visrtx::box1{0.f, hi};
+}