diff --git a/devices/rtx/device/CMakeLists.txt b/devices/rtx/device/CMakeLists.txt index 53db3fb81..4724e4d55 100644 --- a/devices/rtx/device/CMakeLists.txt +++ b/devices/rtx/device/CMakeLists.txt @@ -196,7 +196,9 @@ set(SOURCES spatial_field/StructuredRectilinearSampler.cpp spatial_field/StructuredRegularSampler.cpp spatial_field/space_skipping/UniformGrid.cu + spatial_field/space_skipping/UniformGridCustom.cu spatial_field/CustomFieldSampler.cpp + spatial_field/CustomField.cpp spatial_field/RegisterCustomFields.cpp surface/Surface.cpp diff --git a/devices/rtx/device/gpu/sbt.h b/devices/rtx/device/gpu/sbt.h index 1f0c92645..40222ab68 100644 --- a/devices/rtx/device/gpu/sbt.h +++ b/devices/rtx/device/gpu/sbt.h @@ -52,15 +52,24 @@ enum class SurfaceShaderEntryPoints enum class SpatialFieldSamplerEntryPoints { - Init = 0, - Sample, // Heavy Woodcock-loop bodies live as per-variant direct-callables in each - // sampler PTX module. Renderer wrappers in volumeIntegration.h compute - // (samplerCallableIndex + offset) and invoke via optixDirectCall. - SampleDistance, + // sampler PTX module. Present for every field family. Renderer wrappers in + // volumeIntegration.h compute (samplerCallableIndex + offset) and invoke via + // optixDirectCall. + SampleDistance = 0, RatioTrackTransmittance, RayMarchVolume, - Count + Count, // built-in families reserve this many callable slots + + // Custom-field-only hooks. Built-in families sample inline — their concrete + // state type resolves sampleValue/sampleNormal by ADL inside the Woodcock + // body — so they never dispatch these through the SBT. Custom fields route + // back through the SBT (callable-in-callable) because the user sampler is + // compiled into these callables. + Init = Count, + SampleValue, + SampleNormal, + CustomCount // custom field reserves this many callable slots }; enum class SbtCallableEntryPoints : uint32_t @@ -96,7 +105,8 @@ enum class SbtCallableEntryPoints : uint32_t // based on CustomFieldType in the field data SpatialFieldSamplerCustom = SpatialFieldSamplerNvdbRectilinearFloat + int(SpatialFieldSamplerEntryPoints::Count), - Last = SpatialFieldSamplerCustom + int(SpatialFieldSamplerEntryPoints::Count), + Last = SpatialFieldSamplerCustom + + int(SpatialFieldSamplerEntryPoints::CustomCount), }; } // namespace visrtx diff --git a/devices/rtx/device/gpu/volumeIntegrationDetail.h b/devices/rtx/device/gpu/volumeIntegrationDetail.h index b1f6f980b..4b7655d1c 100644 --- a/devices/rtx/device/gpu/volumeIntegrationDetail.h +++ b/devices/rtx/device/gpu/volumeIntegrationDetail.h @@ -32,9 +32,9 @@ #pragma once // Shared helpers + Woodcock-body templates for the per-sampler callables. -// Templates parameterised on state type and sample / sampleWithGradient -// closures; each sampler _ptx.cu passes inline lambdas so codegen stays -// monomorphic per variant. +// Templates are parameterised on the sampler state type only; they call the +// shared sampleValue / sampleNormal overloads, resolved by ADL on the concrete +// state type, so codegen stays monomorphic per variant. #include "gpu/gpu_decl.h" #include "gpu/gpu_math.h" @@ -154,17 +154,13 @@ VISRTX_DEVICE bool applyShadowRussianRoulette( return false; } -// `sampleWithGradient` is a __device__ lambda capturing nothing — -// compiler inlines through it. -template +template VISRTX_DEVICE vec3 computeWorldNormal(const State &samplerState, const SpatialFieldGPUData &field, const vec3 &localPos, - const mat3x4 &worldToObject, - SampleWithGradientFn sampleWithGradient) + const mat3x4 &worldToObject) { - vec3 localGradient(0.f); - sampleWithGradient(samplerState, field, localPos, localGradient); + const vec3 localGradient = sampleNormal(samplerState, field, localPos); constexpr float MIN_GRADIENT_LENGTH_SQ = 1e-12f; if (glm::dot(localGradient, localGradient) <= MIN_GRADIENT_LENGTH_SQ) return vec3(0.f); @@ -179,9 +175,9 @@ VISRTX_DEVICE vec3 computeWorldNormal(const State &samplerState, // --------------------------------------------------------------------------- // Woodcock-loop body templates. Each sampler family's per-variant callable -// passes its inline sample / sampleWithGradient closures (typically -// __device__ lambdas) and an already-inited state. The compiler inlines -// through the lambdas, so each variant's hot path is monomorphic. +// passes an already-inited state; the body calls the shared sampleValue / +// sampleNormal overloads for that state type, so each variant's hot path is +// monomorphic. // Distance sampling via decomposition tracking (Kutz/Thiery/Novák/Iwasaki 2017): // inside each macrocell, split σ_t = σ_c + σ_r where σ_c = per-cell min @@ -190,7 +186,7 @@ VISRTX_DEVICE vec3 computeWorldNormal(const State &samplerState, // Whichever fires first is the next event; both branches use the local σ_t at // the event point for albedo/extinction. Degenerates to pure delta tracking // when σ_c = 0 (sharp TF) and to closed-form analytic flight when σ_r = 0. -template +template VISRTX_DEVICE float woodcockSampleDistance(ScreenSample &ss, const VolumeHit &hit, State &samplerState, @@ -198,9 +194,7 @@ VISRTX_DEVICE float woodcockSampleDistance(ScreenSample &ss, vec3 &albedo, float &extinction, bool &didScatter, - vec3 *normal, - SampleFn sample, - SampleWithGradientFn sampleWithGradient) + vec3 *normal) { const auto &volume = *hit.volume; auto &svv = volume.data.tf1d; @@ -251,7 +245,7 @@ VISRTX_DEVICE float woodcockSampleDistance(ScreenSample &ss, break; const vec3 p = objRay.org + objRay.dir * t; - const float s = sample(samplerState, field, p); + const float s = sampleValue(samplerState, field, p); if (glm::isnan(s)) continue; @@ -276,7 +270,7 @@ VISRTX_DEVICE float woodcockSampleDistance(ScreenSample &ss, // bound on σ_t). if (!didScatter && t_control < trav.tExit) { const vec3 p = objRay.org + objRay.dir * t_control; - const float s = sample(samplerState, field, p); + const float s = sampleValue(samplerState, field, p); if (!glm::isnan(s)) { const vec4 co = classifySample(volume, s); const float σ_t_p = @@ -295,11 +289,8 @@ VISRTX_DEVICE float woodcockSampleDistance(ScreenSample &ss, } if (normal && didScatter) { - *normal = computeWorldNormal(samplerState, - field, - scatterPos, - hit.instance->worldToObject, - sampleWithGradient); + *normal = computeWorldNormal( + samplerState, field, scatterPos, hit.instance->worldToObject); } return scatterT; @@ -312,13 +303,12 @@ VISRTX_DEVICE float woodcockSampleDistance(ScreenSample &ss, // σ_r_maj = σ_maj - σ_c, accumulating attenuation *= (1 - σ_r / σ_r_maj) per // candidate. Tighter than plain ratio tracking when σ_c > 0; degenerates to // plain ratio tracking when σ_c = 0. -template +template VISRTX_DEVICE void woodcockRatioTrackTransmittance(ScreenSample &ss, const VolumeHit &hit, State &samplerState, const SpatialFieldGPUData &field, - vec3 &attenuation, - SampleFn sample) + vec3 &attenuation) { const auto &volume = *hit.volume; auto &svv = volume.data.tf1d; @@ -363,7 +353,7 @@ VISRTX_DEVICE void woodcockRatioTrackTransmittance(ScreenSample &ss, break; const vec3 p = objRay.org + objRay.dir * t; - const float s = sample(samplerState, field, p); + const float s = sampleValue(samplerState, field, p); if (glm::isnan(s)) continue; @@ -390,7 +380,7 @@ VISRTX_DEVICE void woodcockRatioTrackTransmittance(ScreenSample &ss, // this is deterministic emission-absorption ray marching: the macrocell // `maxOpacity` is used only as an empty-cell flag, never as a majorant for // null-collision sampling. -template +template VISRTX_DEVICE float latticeRayMarchVolume(ScreenSample &ss, const VolumeHit &hit, State &samplerState, @@ -398,9 +388,7 @@ VISRTX_DEVICE float latticeRayMarchVolume(ScreenSample &ss, vec3 *color, vec3 *normal, float &opacity, - float invSamplingRate, - SampleFn sample, - SampleWithGradientFn sampleWithGradient) + float invSamplingRate) { const auto &volume = *hit.volume; auto &svv = volume.data.tf1d; @@ -421,7 +409,9 @@ VISRTX_DEVICE float latticeRayMarchVolume(ScreenSample &ss, float depth = std::numeric_limits::max(); constexpr float MIN_OPACITY_THRESHOLD = 1e-2f; - constexpr float MAX_OPACITY_THRESHOLD = 0.99f; + // Early-out once the segment is effectively opaque. Kept moderately high so + // the residual transmittance zeroed below is genuinely negligible. + constexpr float MAX_OPACITY_THRESHOLD = 0.999f; // Single stratified jitter at the segment start. const float jitter = @@ -452,7 +442,7 @@ VISRTX_DEVICE float latticeRayMarchVolume(ScreenSample &ss, break; const vec3 p = objRay.org + objRay.dir * nextSampleT; - const float s = sample(samplerState, field, p); + const float s = sampleValue(samplerState, field, p); if (!glm::isnan(s)) { const vec4 co = classifySample(volume, s); @@ -474,15 +464,20 @@ VISRTX_DEVICE float latticeRayMarchVolume(ScreenSample &ss, trav.next(); } + // The early-out treats the segment as opaque, but front-to-back compositing + // leaves a small residual transmittance (1 - opacity). Against a low-dynamic- + // range background it's invisible; against an HDR background (bright sky, sun) + // even ~0.1% leaks visibly and makes the volume look more transparent than it + // should be. + if (opacity >= MAX_OPACITY_THRESHOLD) + opacity = 1.0f; + if (normal) { *normal = vec3(0.f); if (depth < std::numeric_limits::max()) { const vec3 p = objRay.org + objRay.dir * depth; - *normal = computeWorldNormal(samplerState, - field, - p, - hit.instance->worldToObject, - sampleWithGradient); + *normal = computeWorldNormal( + samplerState, field, p, hit.instance->worldToObject); } } diff --git a/devices/rtx/device/renderer/Renderer.cpp b/devices/rtx/device/renderer/Renderer.cpp index 23b160d44..6dd2778dd 100644 --- a/devices/rtx/device/renderer/Renderer.cpp +++ b/devices/rtx/device/renderer/Renderer.cpp @@ -591,14 +591,6 @@ void Renderer::initOptixPipeline() constexpr auto SBT_CALLABLE_SPATIAL_FIELD_REGULAR_OFFSET = int(SbtCallableEntryPoints::SpatialFieldSamplerRegular); samplerDesc.callables.moduleDC = state.fieldSamplers.structuredRegular; - samplerDesc.callables.entryFunctionNameDC = - "__direct_callable__initStructuredRegularSampler"; - callableDescs[SBT_CALLABLE_SPATIAL_FIELD_REGULAR_OFFSET - + int(SpatialFieldSamplerEntryPoints::Init)] = samplerDesc; - samplerDesc.callables.entryFunctionNameDC = - "__direct_callable__sampleStructuredRegular"; - callableDescs[SBT_CALLABLE_SPATIAL_FIELD_REGULAR_OFFSET - + int(SpatialFieldSamplerEntryPoints::Sample)] = samplerDesc; samplerDesc.callables.entryFunctionNameDC = "__direct_callable__sampleDistanceStructuredRegular"; callableDescs[SBT_CALLABLE_SPATIAL_FIELD_REGULAR_OFFSET @@ -628,14 +620,6 @@ void Renderer::initOptixPipeline() int(SbtCallableEntryPoints::SpatialFieldSamplerNvdbFloat); // Fp4 - samplerDesc.callables.entryFunctionNameDC = - "__direct_callable__initNvdbSamplerFp4"; - callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_FP4_OFFSET - + int(SpatialFieldSamplerEntryPoints::Init)] = samplerDesc; - samplerDesc.callables.entryFunctionNameDC = - "__direct_callable__sampleNvdbFp4"; - callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_FP4_OFFSET - + int(SpatialFieldSamplerEntryPoints::Sample)] = samplerDesc; samplerDesc.callables.entryFunctionNameDC = "__direct_callable__sampleDistanceNvdbFp4"; callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_FP4_OFFSET @@ -651,14 +635,6 @@ void Renderer::initOptixPipeline() + int(SpatialFieldSamplerEntryPoints::RayMarchVolume)] = samplerDesc; // Fp8 - samplerDesc.callables.entryFunctionNameDC = - "__direct_callable__initNvdbSamplerFp8"; - callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_FP8_OFFSET - + int(SpatialFieldSamplerEntryPoints::Init)] = samplerDesc; - samplerDesc.callables.entryFunctionNameDC = - "__direct_callable__sampleNvdbFp8"; - callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_FP8_OFFSET - + int(SpatialFieldSamplerEntryPoints::Sample)] = samplerDesc; samplerDesc.callables.entryFunctionNameDC = "__direct_callable__sampleDistanceNvdbFp8"; callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_FP8_OFFSET @@ -674,14 +650,6 @@ void Renderer::initOptixPipeline() + int(SpatialFieldSamplerEntryPoints::RayMarchVolume)] = samplerDesc; // Fp16 - samplerDesc.callables.entryFunctionNameDC = - "__direct_callable__initNvdbSamplerFp16"; - callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_FP16_OFFSET - + int(SpatialFieldSamplerEntryPoints::Init)] = samplerDesc; - samplerDesc.callables.entryFunctionNameDC = - "__direct_callable__sampleNvdbFp16"; - callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_FP16_OFFSET - + int(SpatialFieldSamplerEntryPoints::Sample)] = samplerDesc; samplerDesc.callables.entryFunctionNameDC = "__direct_callable__sampleDistanceNvdbFp16"; callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_FP16_OFFSET @@ -697,14 +665,6 @@ void Renderer::initOptixPipeline() + int(SpatialFieldSamplerEntryPoints::RayMarchVolume)] = samplerDesc; // FpN - samplerDesc.callables.entryFunctionNameDC = - "__direct_callable__initNvdbSamplerFpN"; - callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_FPN_OFFSET - + int(SpatialFieldSamplerEntryPoints::Init)] = samplerDesc; - samplerDesc.callables.entryFunctionNameDC = - "__direct_callable__sampleNvdbFpN"; - callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_FPN_OFFSET - + int(SpatialFieldSamplerEntryPoints::Sample)] = samplerDesc; samplerDesc.callables.entryFunctionNameDC = "__direct_callable__sampleDistanceNvdbFpN"; callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_FPN_OFFSET @@ -720,14 +680,6 @@ void Renderer::initOptixPipeline() + int(SpatialFieldSamplerEntryPoints::RayMarchVolume)] = samplerDesc; // Float - samplerDesc.callables.entryFunctionNameDC = - "__direct_callable__initNvdbSamplerFloat"; - callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_FLOAT_OFFSET - + int(SpatialFieldSamplerEntryPoints::Init)] = samplerDesc; - samplerDesc.callables.entryFunctionNameDC = - "__direct_callable__sampleNvdbFloat"; - callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_FLOAT_OFFSET - + int(SpatialFieldSamplerEntryPoints::Sample)] = samplerDesc; samplerDesc.callables.entryFunctionNameDC = "__direct_callable__sampleDistanceNvdbFloat"; callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_FLOAT_OFFSET @@ -747,14 +699,6 @@ void Renderer::initOptixPipeline() constexpr auto SBT_CALLABLE_SPATIAL_FIELD_RECTILINEAR_OFFSET = int(SbtCallableEntryPoints::SpatialFieldSamplerRectilinear); - samplerDesc.callables.entryFunctionNameDC = - "__direct_callable__initStructuredRectilinearSampler"; - callableDescs[SBT_CALLABLE_SPATIAL_FIELD_RECTILINEAR_OFFSET - + int(SpatialFieldSamplerEntryPoints::Init)] = samplerDesc; - samplerDesc.callables.entryFunctionNameDC = - "__direct_callable__sampleStructuredRectilinear"; - callableDescs[SBT_CALLABLE_SPATIAL_FIELD_RECTILINEAR_OFFSET - + int(SpatialFieldSamplerEntryPoints::Sample)] = samplerDesc; samplerDesc.callables.entryFunctionNameDC = "__direct_callable__sampleDistanceStructuredRectilinear"; callableDescs[SBT_CALLABLE_SPATIAL_FIELD_RECTILINEAR_OFFSET @@ -784,14 +728,6 @@ void Renderer::initOptixPipeline() int(SbtCallableEntryPoints::SpatialFieldSamplerNvdbRectilinearFloat); // Fp4 - samplerDesc.callables.entryFunctionNameDC = - "__direct_callable__initNvdbRectilinearSamplerFp4"; - callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_REC_FP4_OFFSET - + int(SpatialFieldSamplerEntryPoints::Init)] = samplerDesc; - samplerDesc.callables.entryFunctionNameDC = - "__direct_callable__sampleNvdbRectilinearFp4"; - callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_REC_FP4_OFFSET - + int(SpatialFieldSamplerEntryPoints::Sample)] = samplerDesc; samplerDesc.callables.entryFunctionNameDC = "__direct_callable__sampleDistanceNvdbRectilinearFp4"; callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_REC_FP4_OFFSET @@ -807,14 +743,6 @@ void Renderer::initOptixPipeline() + int(SpatialFieldSamplerEntryPoints::RayMarchVolume)] = samplerDesc; // Fp8 - samplerDesc.callables.entryFunctionNameDC = - "__direct_callable__initNvdbRectilinearSamplerFp8"; - callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_REC_FP8_OFFSET - + int(SpatialFieldSamplerEntryPoints::Init)] = samplerDesc; - samplerDesc.callables.entryFunctionNameDC = - "__direct_callable__sampleNvdbRectilinearFp8"; - callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_REC_FP8_OFFSET - + int(SpatialFieldSamplerEntryPoints::Sample)] = samplerDesc; samplerDesc.callables.entryFunctionNameDC = "__direct_callable__sampleDistanceNvdbRectilinearFp8"; callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_REC_FP8_OFFSET @@ -830,14 +758,6 @@ void Renderer::initOptixPipeline() + int(SpatialFieldSamplerEntryPoints::RayMarchVolume)] = samplerDesc; // Fp16 - samplerDesc.callables.entryFunctionNameDC = - "__direct_callable__initNvdbRectilinearSamplerFp16"; - callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_REC_FP16_OFFSET - + int(SpatialFieldSamplerEntryPoints::Init)] = samplerDesc; - samplerDesc.callables.entryFunctionNameDC = - "__direct_callable__sampleNvdbRectilinearFp16"; - callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_REC_FP16_OFFSET - + int(SpatialFieldSamplerEntryPoints::Sample)] = samplerDesc; samplerDesc.callables.entryFunctionNameDC = "__direct_callable__sampleDistanceNvdbRectilinearFp16"; callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_REC_FP16_OFFSET @@ -853,14 +773,6 @@ void Renderer::initOptixPipeline() + int(SpatialFieldSamplerEntryPoints::RayMarchVolume)] = samplerDesc; // FpN - samplerDesc.callables.entryFunctionNameDC = - "__direct_callable__initNvdbRectilinearSamplerFpN"; - callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_REC_FPN_OFFSET - + int(SpatialFieldSamplerEntryPoints::Init)] = samplerDesc; - samplerDesc.callables.entryFunctionNameDC = - "__direct_callable__sampleNvdbRectilinearFpN"; - callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_REC_FPN_OFFSET - + int(SpatialFieldSamplerEntryPoints::Sample)] = samplerDesc; samplerDesc.callables.entryFunctionNameDC = "__direct_callable__sampleDistanceNvdbRectilinearFpN"; callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_REC_FPN_OFFSET @@ -876,14 +788,6 @@ void Renderer::initOptixPipeline() + int(SpatialFieldSamplerEntryPoints::RayMarchVolume)] = samplerDesc; // Float - samplerDesc.callables.entryFunctionNameDC = - "__direct_callable__initNvdbRectilinearSamplerFloat"; - callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_REC_FLOAT_OFFSET - + int(SpatialFieldSamplerEntryPoints::Init)] = samplerDesc; - samplerDesc.callables.entryFunctionNameDC = - "__direct_callable__sampleNvdbRectilinearFloat"; - callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_REC_FLOAT_OFFSET - + int(SpatialFieldSamplerEntryPoints::Sample)] = samplerDesc; samplerDesc.callables.entryFunctionNameDC = "__direct_callable__sampleDistanceNvdbRectilinearFloat"; callableDescs[SBT_CALLABLE_SPATIAL_FIELD_NVDB_REC_FLOAT_OFFSET @@ -910,9 +814,13 @@ void Renderer::initOptixPipeline() callableDescs[SBT_CALLABLE_CUSTOM_OFFSET + int(SpatialFieldSamplerEntryPoints::Init)] = samplerDesc; samplerDesc.callables.entryFunctionNameDC = - "__direct_callable__sampleCustom"; + "__direct_callable__sampleValueCustom"; + callableDescs[SBT_CALLABLE_CUSTOM_OFFSET + + int(SpatialFieldSamplerEntryPoints::SampleValue)] = samplerDesc; + samplerDesc.callables.entryFunctionNameDC = + "__direct_callable__sampleNormalCustom"; callableDescs[SBT_CALLABLE_CUSTOM_OFFSET - + int(SpatialFieldSamplerEntryPoints::Sample)] = samplerDesc; + + int(SpatialFieldSamplerEntryPoints::SampleNormal)] = samplerDesc; samplerDesc.callables.entryFunctionNameDC = "__direct_callable__sampleDistanceCustom"; callableDescs[SBT_CALLABLE_CUSTOM_OFFSET diff --git a/devices/rtx/device/spatial_field/CustomField.cpp b/devices/rtx/device/spatial_field/CustomField.cpp new file mode 100644 index 000000000..60958ceb0 --- /dev/null +++ b/devices/rtx/device/spatial_field/CustomField.cpp @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include "CustomField.h" + +namespace visrtx { + +void CustomField::finalize() +{ + // gpuData() virtual-dispatches to the concrete custom field, which has + // already uploaded its device data and called m_uniformGrid.init(...). + m_uniformGrid.computeValueRanges(gpuData()); + + // Upload gpuData() into the device registry. Must run after the grid is + // populated so the snapshot includes the macrocell pointers. Built-in fields + // do the same at the end of their finalize(); omitting it leaves + // registry.fields[index] uninitialized → garbage samplerCallableIndex/grid + // on the GPU → illegal access when the volume is sampled. + upload(); +} + +} // namespace visrtx diff --git a/devices/rtx/device/spatial_field/CustomField.h b/devices/rtx/device/spatial_field/CustomField.h index 0724cb5ae..12d091afa 100644 --- a/devices/rtx/device/spatial_field/CustomField.h +++ b/devices/rtx/device/spatial_field/CustomField.h @@ -25,6 +25,10 @@ struct CustomField : public SpatialField { CustomField(DeviceGlobalState *d) : SpatialField(d) {} ~CustomField() override = default; + + // Builds the majorant grid from the (already-uploaded) field data. Derived + // fields must init the grid and upload GPU data before calling this. + void finalize() override; }; } // namespace visrtx diff --git a/devices/rtx/device/spatial_field/CustomFieldSampler_ptx.cu b/devices/rtx/device/spatial_field/CustomFieldSampler_ptx.cu index 8cd9a3457..5409f34cb 100644 --- a/devices/rtx/device/spatial_field/CustomFieldSampler_ptx.cu +++ b/devices/rtx/device/spatial_field/CustomFieldSampler_ptx.cu @@ -13,7 +13,7 @@ * 1. Define the field data struct and add to CustomFieldType enum * 2. Create a sampler header with sampleXxx() function * 3. Include the header below - * 4. Add a case to the switch in __direct_callable__sampleCustom + * 4. Add a case to the dispatch used by sampleCustomImpl */ #include "gpu/gpu_decl.h" @@ -50,39 +50,98 @@ VISRTX_CALLABLE void __direct_callable__initCustomSampler( samplerState->custom = field->data.custom; } -/** - * @brief Sample the custom field at a given location - * - * Dispatches to the appropriate sampling function based on subType. - * Returns a normalized field value in [0, 1]. - * - * If no custom samplers are configured (VISRTX_CUSTOM_SAMPLE_DISPATCH - * not defined), returns 0.0 as a fallback. - */ -VISRTX_CALLABLE float __direct_callable__sampleCustom( - const VolumeSamplingState *samplerState, - const vec3 *location, - vec3 *gradient) +// User value sampler. Returns 0 when no custom samplers are configured. The +// user API exposes only a scalar sampler (VISRTX_CUSTOM_SAMPLE_DISPATCH); the +// normal is derived from it by central differences below. +namespace visrtx { +VISRTX_DEVICE float sampleCustomImpl(const CustomFieldData &data, const vec3 &P) { #ifdef VISRTX_CUSTOM_SAMPLE_DISPATCH - const CustomFieldData &data = samplerState->custom; - const vec3 P = *location; - VISRTX_CUSTOM_SAMPLE_DISPATCH(data, P) #else return 0.0f; #endif } +} // namespace visrtx + +VISRTX_CALLABLE float __direct_callable__sampleValueCustom( + const VolumeSamplingState *samplerState, + const SpatialFieldGPUData *, + const vec3 *location) +{ + return sampleCustomImpl(samplerState->custom, *location); +} + +// Central-difference gradient of the user value sampler. Returns the +// unnormalized object-space gradient, matching the built-in sampleNormal +// convention (caller orients + normalizes, see computeWorldNormal). Step is a +// small fraction of the field's domain so it is scale-invariant. +VISRTX_CALLABLE vec3 __direct_callable__sampleNormalCustom( + const VolumeSamplingState *samplerState, + const SpatialFieldGPUData *field, + const vec3 *location) +{ + const CustomFieldData &d = samplerState->custom; + const vec3 p = *location; + const vec3 extent = field->roi.upper - field->roi.lower; + const float eps = fmaxf(1e-3f * glm::length(extent), 1e-6f); + const float gx = sampleCustomImpl(d, p + vec3(eps, 0.f, 0.f)) + - sampleCustomImpl(d, p - vec3(eps, 0.f, 0.f)); + const float gy = sampleCustomImpl(d, p + vec3(0.f, eps, 0.f)) + - sampleCustomImpl(d, p - vec3(0.f, eps, 0.f)); + const float gz = sampleCustomImpl(d, p + vec3(0.f, 0.f, eps)) + - sampleCustomImpl(d, p - vec3(0.f, 0.f, eps)); + return vec3(gx, gy, gz) * (1.f / (2.f * eps)); +} //============================================================================= // Woodcock-body callables for custom fields. Because the inner sample() -// implementation is user-supplied (compiled into __direct_callable__sampleCustom -// in this same module), the Woodcock body dispatches via optixDirectCall to the -// same module's Sample slot — callable-in-callable. Slower than the built-in -// families' inline path but preserves the user-facing UX (custom-field authors -// implement only init + sample). +// implementation is user-supplied (compiled into the value/normal callables in +// this same module), the Woodcock body dispatches via optixDirectCall to the +// same module's SampleValue/SampleNormal slots — callable-in-callable. Slower +// than the built-in families' inline path but preserves the user-facing UX +// (custom-field authors implement only init + sample). //============================================================================= +// Shared per-sample API (see gpu/volumeIntegrationDetail.h) for custom fields. +// Unlike the built-in inline samplers, custom sampling routes back through the +// user's init/value/normal direct-callables, so it needs `field` for the +// callable index. +namespace visrtx { + +VISRTX_DEVICE void initSamplerState( + VolumeSamplingState &s, const SpatialFieldGPUData &field) +{ + optixDirectCall(uint32_t(field.samplerCallableIndex) + + uint32_t(SpatialFieldSamplerEntryPoints::Init), + &s, + &field); +} + +VISRTX_DEVICE float sampleValue(const VolumeSamplingState &s, + const SpatialFieldGPUData &field, + const vec3 &p) +{ + return optixDirectCall(uint32_t(field.samplerCallableIndex) + + uint32_t(SpatialFieldSamplerEntryPoints::SampleValue), + &s, + &field, + &p); +} + +VISRTX_DEVICE vec3 sampleNormal(const VolumeSamplingState &s, + const SpatialFieldGPUData &field, + const vec3 &p) +{ + return optixDirectCall(uint32_t(field.samplerCallableIndex) + + uint32_t(SpatialFieldSamplerEntryPoints::SampleNormal), + &s, + &field, + &p); +} + +} // namespace visrtx + VISRTX_CALLABLE float __direct_callable__sampleDistanceCustom(ScreenSample *ss, const VolumeHit *hit, vec3 *albedo, @@ -93,9 +152,8 @@ VISRTX_CALLABLE float __direct_callable__sampleDistanceCustom(ScreenSample *ss, const auto &field = getSpatialFieldData(*ss->frameData, hit->volume->data.tf1d.field); VolumeSamplingState samplerState; - samplerState.custom = field.data.custom; + initSamplerState(samplerState, field); - const uint32_t baseIdx = uint32_t(field.samplerCallableIndex); return detail::woodcockSampleDistance(*ss, *hit, samplerState, @@ -103,26 +161,7 @@ VISRTX_CALLABLE float __direct_callable__sampleDistanceCustom(ScreenSample *ss, *albedo, *extinction, *didScatter, - normal, - [baseIdx] __device__(const VolumeSamplingState &s, - const SpatialFieldGPUData &, - const vec3 &p) { - return optixDirectCall( - baseIdx + uint32_t(SpatialFieldSamplerEntryPoints::Sample), - &s, - &p, - (vec3 *)nullptr); - }, - [baseIdx] __device__(const VolumeSamplingState &s, - const SpatialFieldGPUData &, - const vec3 &p, - vec3 &g) { - return optixDirectCall( - baseIdx + uint32_t(SpatialFieldSamplerEntryPoints::Sample), - &s, - &p, - &g); - }); + normal); } VISRTX_CALLABLE void __direct_callable__ratioTrackTransmittanceCustom( @@ -131,23 +170,10 @@ VISRTX_CALLABLE void __direct_callable__ratioTrackTransmittanceCustom( const auto &field = getSpatialFieldData(*ss->frameData, hit->volume->data.tf1d.field); VolumeSamplingState samplerState; - samplerState.custom = field.data.custom; + initSamplerState(samplerState, field); - const uint32_t baseIdx = uint32_t(field.samplerCallableIndex); - detail::woodcockRatioTrackTransmittance(*ss, - *hit, - samplerState, - field, - *attenuation, - [baseIdx] __device__(const VolumeSamplingState &s, - const SpatialFieldGPUData &, - const vec3 &p) { - return optixDirectCall( - baseIdx + uint32_t(SpatialFieldSamplerEntryPoints::Sample), - &s, - &p, - (vec3 *)nullptr); - }); + detail::woodcockRatioTrackTransmittance( + *ss, *hit, samplerState, field, *attenuation); } VISRTX_CALLABLE float __direct_callable__rayMarchVolumeCustom(ScreenSample *ss, @@ -160,34 +186,8 @@ VISRTX_CALLABLE float __direct_callable__rayMarchVolumeCustom(ScreenSample *ss, const auto &field = getSpatialFieldData(*ss->frameData, hit->volume->data.tf1d.field); VolumeSamplingState samplerState; - samplerState.custom = field.data.custom; + initSamplerState(samplerState, field); - const uint32_t baseIdx = uint32_t(field.samplerCallableIndex); - return detail::latticeRayMarchVolume(*ss, - *hit, - samplerState, - field, - color, - normal, - *opacity, - invSamplingRate, - [baseIdx] __device__(const VolumeSamplingState &s, - const SpatialFieldGPUData &, - const vec3 &p) { - return optixDirectCall( - baseIdx + uint32_t(SpatialFieldSamplerEntryPoints::Sample), - &s, - &p, - (vec3 *)nullptr); - }, - [baseIdx] __device__(const VolumeSamplingState &s, - const SpatialFieldGPUData &, - const vec3 &p, - vec3 &g) { - return optixDirectCall( - baseIdx + uint32_t(SpatialFieldSamplerEntryPoints::Sample), - &s, - &p, - &g); - }); + return detail::latticeRayMarchVolume( + *ss, *hit, samplerState, field, color, normal, *opacity, invSamplingRate); } diff --git a/devices/rtx/device/spatial_field/NvdbRectilinearSamplerInline.h b/devices/rtx/device/spatial_field/NvdbRectilinearSamplerInline.h index e267b4a01..356104eb7 100644 --- a/devices/rtx/device/spatial_field/NvdbRectilinearSamplerInline.h +++ b/devices/rtx/device/spatial_field/NvdbRectilinearSamplerInline.h @@ -120,37 +120,43 @@ VISRTX_DEVICE float sampleAtIndexRectilinear( return state.linearSampler(clamped); } +// Shared per-sample API (see gpu/volumeIntegrationDetail.h). sampleValue +// returns the field value; sampleNormal returns the unnormalized object-space +// gradient (the raw normal direction) — callers orient and normalize. `field` +// is unused for built-in fields (present for a uniform overload set). template -VISRTX_DEVICE float sampleNvdbRectilinear( +VISRTX_DEVICE float sampleValue( const NvdbRectilinearSamplerState &state, - const vec3 *location, - vec3 *gradient) + const SpatialFieldGPUData &, + const vec3 &p) { - const auto indexPos = worldToIndexRectilinear(state, location); - const float value = sampleAtIndexRectilinear(state, indexPos); - - if (gradient) { - const float sxp = - sampleAtIndexRectilinear(state, indexPos + nanovdb::Vec3f(1, 0, 0)); - const float sxn = - sampleAtIndexRectilinear(state, indexPos - nanovdb::Vec3f(1, 0, 0)); - const float syp = - sampleAtIndexRectilinear(state, indexPos + nanovdb::Vec3f(0, 1, 0)); - const float syn = - sampleAtIndexRectilinear(state, indexPos - nanovdb::Vec3f(0, 1, 0)); - const float szp = - sampleAtIndexRectilinear(state, indexPos + nanovdb::Vec3f(0, 0, 1)); - const float szn = - sampleAtIndexRectilinear(state, indexPos - nanovdb::Vec3f(0, 0, 1)); - - *gradient = vec3(sxp - sxn, syp - syn, szp - szn) - * vec3(state.invAvgVoxelSize[0], - state.invAvgVoxelSize[1], - state.invAvgVoxelSize[2]) - * 0.5f; - } + return sampleAtIndexRectilinear(state, worldToIndexRectilinear(state, &p)); +} - return value; +template +VISRTX_DEVICE vec3 sampleNormal( + const NvdbRectilinearSamplerState &state, + const SpatialFieldGPUData &, + const vec3 &p) +{ + const auto indexPos = worldToIndexRectilinear(state, &p); + const float sxp = + sampleAtIndexRectilinear(state, indexPos + nanovdb::Vec3f(1, 0, 0)); + const float sxn = + sampleAtIndexRectilinear(state, indexPos - nanovdb::Vec3f(1, 0, 0)); + const float syp = + sampleAtIndexRectilinear(state, indexPos + nanovdb::Vec3f(0, 1, 0)); + const float syn = + sampleAtIndexRectilinear(state, indexPos - nanovdb::Vec3f(0, 1, 0)); + const float szp = + sampleAtIndexRectilinear(state, indexPos + nanovdb::Vec3f(0, 0, 1)); + const float szn = + sampleAtIndexRectilinear(state, indexPos - nanovdb::Vec3f(0, 0, 1)); + return vec3(sxp - sxn, syp - syn, szp - szn) + * vec3(state.invAvgVoxelSize[0], + state.invAvgVoxelSize[1], + state.invAvgVoxelSize[2]) + * 0.5f; } } // namespace visrtx diff --git a/devices/rtx/device/spatial_field/NvdbRectilinearSampler_ptx.cu b/devices/rtx/device/spatial_field/NvdbRectilinearSampler_ptx.cu index fc3f52fe6..b29b92059 100644 --- a/devices/rtx/device/spatial_field/NvdbRectilinearSampler_ptx.cu +++ b/devices/rtx/device/spatial_field/NvdbRectilinearSampler_ptx.cu @@ -30,93 +30,15 @@ */ // OptiX direct-callable entry points for the NanoVDB rectilinear-grid sampler. -// Implementations live in NvdbRectilinearSamplerInline.h. +// Only the Woodcock-body callables are exposed via the SBT; value/normal/init +// sampling stays inline (NvdbRectilinearSamplerInline.h) since the bodies below +// resolve sampleValue/sampleNormal by ADL on the concrete state type. #include "NvdbRectilinearSamplerInline.h" #include "gpu/volumeIntegrationDetail.h" using namespace visrtx; -// Fp4 rectilinear sampler -VISRTX_CALLABLE void __direct_callable__initNvdbRectilinearSamplerFp4( - VolumeSamplingState *samplerState, const SpatialFieldGPUData *field) -{ - initNvdbRectilinearSampler(samplerState->nvdbRectilinearFp4, field); -} - -VISRTX_CALLABLE float __direct_callable__sampleNvdbRectilinearFp4( - const VolumeSamplingState *samplerState, - const vec3 *location, - vec3 *gradient) -{ - return sampleNvdbRectilinear( - samplerState->nvdbRectilinearFp4, location, gradient); -} - -// Fp8 rectilinear sampler -VISRTX_CALLABLE void __direct_callable__initNvdbRectilinearSamplerFp8( - VolumeSamplingState *samplerState, const SpatialFieldGPUData *field) -{ - initNvdbRectilinearSampler(samplerState->nvdbRectilinearFp8, field); -} - -VISRTX_CALLABLE float __direct_callable__sampleNvdbRectilinearFp8( - const VolumeSamplingState *samplerState, - const vec3 *location, - vec3 *gradient) -{ - return sampleNvdbRectilinear( - samplerState->nvdbRectilinearFp8, location, gradient); -} - -// Fp16 rectilinear sampler -VISRTX_CALLABLE void __direct_callable__initNvdbRectilinearSamplerFp16( - VolumeSamplingState *samplerState, const SpatialFieldGPUData *field) -{ - initNvdbRectilinearSampler(samplerState->nvdbRectilinearFp16, field); -} - -VISRTX_CALLABLE float __direct_callable__sampleNvdbRectilinearFp16( - const VolumeSamplingState *samplerState, - const vec3 *location, - vec3 *gradient) -{ - return sampleNvdbRectilinear( - samplerState->nvdbRectilinearFp16, location, gradient); -} - -// FpN rectilinear sampler -VISRTX_CALLABLE void __direct_callable__initNvdbRectilinearSamplerFpN( - VolumeSamplingState *samplerState, const SpatialFieldGPUData *field) -{ - initNvdbRectilinearSampler(samplerState->nvdbRectilinearFpN, field); -} - -VISRTX_CALLABLE float __direct_callable__sampleNvdbRectilinearFpN( - const VolumeSamplingState *samplerState, - const vec3 *location, - vec3 *gradient) -{ - return sampleNvdbRectilinear( - samplerState->nvdbRectilinearFpN, location, gradient); -} - -// Float rectilinear sampler -VISRTX_CALLABLE void __direct_callable__initNvdbRectilinearSamplerFloat( - VolumeSamplingState *samplerState, const SpatialFieldGPUData *field) -{ - initNvdbRectilinearSampler(samplerState->nvdbRectilinearFloat, field); -} - -VISRTX_CALLABLE float __direct_callable__sampleNvdbRectilinearFloat( - const VolumeSamplingState *samplerState, - const vec3 *location, - vec3 *gradient) -{ - return sampleNvdbRectilinear( - samplerState->nvdbRectilinearFloat, location, gradient); -} - // Woodcock-body callables — see NvdbRegularSampler_ptx.cu for the design rationale. #define VISRTX_DEFINE_NVDB_RECT_WOODCOCK_CALLABLES(Suffix, ValueType) \ VISRTX_CALLABLE float __direct_callable__sampleDistance##Suffix( \ @@ -139,14 +61,7 @@ VISRTX_CALLABLE float __direct_callable__sampleNvdbRectilinearFloat( *albedo, \ *extinction, \ *didScatter, \ - normal, \ - [] __device__(const NvdbRectilinearSamplerState &s, \ - const SpatialFieldGPUData &, \ - const vec3 &p) { return sampleNvdbRectilinear(s, &p, nullptr); }, \ - [] __device__(const NvdbRectilinearSamplerState &s, \ - const SpatialFieldGPUData &, \ - const vec3 &p, \ - vec3 &g) { return sampleNvdbRectilinear(s, &p, &g); }); \ + normal); \ } \ \ VISRTX_CALLABLE void __direct_callable__ratioTrackTransmittance##Suffix( \ @@ -157,14 +72,8 @@ VISRTX_CALLABLE float __direct_callable__sampleNvdbRectilinearFloat( SamplerStateBox> stateBox; \ auto &samplerState = stateBox.state; \ initNvdbRectilinearSampler(samplerState, &field); \ - detail::woodcockRatioTrackTransmittance(*ss, \ - *hit, \ - samplerState, \ - field, \ - *attenuation, \ - [] __device__(const NvdbRectilinearSamplerState &s, \ - const SpatialFieldGPUData &, \ - const vec3 &p) { return sampleNvdbRectilinear(s, &p, nullptr); });\ + detail::woodcockRatioTrackTransmittance( \ + *ss, *hit, samplerState, field, *attenuation); \ } \ \ VISRTX_CALLABLE float __direct_callable__rayMarchVolume##Suffix( \ @@ -187,14 +96,7 @@ VISRTX_CALLABLE float __direct_callable__sampleNvdbRectilinearFloat( color, \ normal, \ *opacity, \ - invSamplingRate, \ - [] __device__(const NvdbRectilinearSamplerState &s, \ - const SpatialFieldGPUData &, \ - const vec3 &p) { return sampleNvdbRectilinear(s, &p, nullptr); }, \ - [] __device__(const NvdbRectilinearSamplerState &s, \ - const SpatialFieldGPUData &, \ - const vec3 &p, \ - vec3 &g) { return sampleNvdbRectilinear(s, &p, &g); }); \ + invSamplingRate); \ } VISRTX_DEFINE_NVDB_RECT_WOODCOCK_CALLABLES(NvdbRectilinearFp4, nanovdb::Fp4) diff --git a/devices/rtx/device/spatial_field/NvdbRegularSamplerInline.h b/devices/rtx/device/spatial_field/NvdbRegularSamplerInline.h index de967b1dc..649aba3ec 100644 --- a/devices/rtx/device/spatial_field/NvdbRegularSamplerInline.h +++ b/devices/rtx/device/spatial_field/NvdbRegularSamplerInline.h @@ -32,10 +32,10 @@ #pragma once // NanoVDB regular-grid sampler — inline device implementations. -// Lives in a header so the volume integrator can call sampleNvdb directly, -// bypassing the OptiX direct-callable dispatch on hot paths. The matching -// __direct_callable__ entry points stay in NvdbRegularSampler_ptx.cu for -// renderers that go through the SBT slot. +// Lives in a header so the volume integrator and isosurface can call +// sampleValue/sampleNormal directly, bypassing the OptiX direct-callable +// dispatch on hot paths. The matching __direct_callable__ entry points stay in +// NvdbRegularSampler_ptx.cu for renderers that go through the SBT slot. #include "gpu/gpu_decl.h" #include "gpu/gpu_objects.h" @@ -106,64 +106,55 @@ VISRTX_DEVICE void initNvdbSampler( state.invTwoVoxelSize = nanovdb::Vec3f(iv.x, iv.y, iv.z); } +// Object-space position -> index-space sample coordinate. template -VISRTX_DEVICE float sampleNvdb( - const NvdbRegularSamplerState &state, - const vec3 *location, - vec3 *gradient) +VISRTX_DEVICE nanovdb::Vec3f nvdbIndexPos( + const NvdbRegularSamplerState &state, const vec3 &p) { - const auto indexPos0 = state.grid->worldToIndexF( - nanovdb::Vec3f(location->x, location->y, location->z)); - - const auto indexPos = - (indexPos0 - state.offsetDown) * state.scale + state.offsetUp; + const auto indexPos0 = + state.grid->worldToIndexF(nanovdb::Vec3f(p.x, p.y, p.z)); + return (indexPos0 - state.offsetDown) * state.scale + state.offsetUp; +} - const auto clamped = clampNvdb(indexPos, state.indexMin, state.indexMax); +// Filtered fetch at an index-space coordinate (clamped to the grid). +template +VISRTX_DEVICE float nvdbSampleAtIndex( + const NvdbRegularSamplerState &state, const nanovdb::Vec3f &idx) +{ + const auto c = clampNvdb(idx, state.indexMin, state.indexMax); + return state.filter == SpatialFieldFilter::Nearest ? state.nearestSampler(c) + : state.linearSampler(c); +} - if (state.filter == SpatialFieldFilter::Nearest) { - const float value = state.nearestSampler(clamped); - if (gradient) { - // Central differences at ±1 voxel in index space - const float sxp = state.nearestSampler(clampNvdb( - indexPos + nanovdb::Vec3f(1, 0, 0), state.indexMin, state.indexMax)); - const float sxn = state.nearestSampler(clampNvdb( - indexPos - nanovdb::Vec3f(1, 0, 0), state.indexMin, state.indexMax)); - const float syp = state.nearestSampler(clampNvdb( - indexPos + nanovdb::Vec3f(0, 1, 0), state.indexMin, state.indexMax)); - const float syn = state.nearestSampler(clampNvdb( - indexPos - nanovdb::Vec3f(0, 1, 0), state.indexMin, state.indexMax)); - const float szp = state.nearestSampler(clampNvdb( - indexPos + nanovdb::Vec3f(0, 0, 1), state.indexMin, state.indexMax)); - const float szn = state.nearestSampler(clampNvdb( - indexPos - nanovdb::Vec3f(0, 0, 1), state.indexMin, state.indexMax)); - // Convert from index space to object space - *gradient = - vec3((sxp - sxn) * state.scale[0] * state.invTwoVoxelSize[0], - (syp - syn) * state.scale[1] * state.invTwoVoxelSize[1], - (szp - szn) * state.scale[2] * state.invTwoVoxelSize[2]); - } - return value; - } +// Shared per-sample API (see gpu/volumeIntegrationDetail.h). sampleValue +// returns the field value; sampleNormal returns the unnormalized object-space +// gradient (the raw normal direction) — callers orient and normalize. `field` +// is unused for built-in fields (present for a uniform overload set). +template +VISRTX_DEVICE float sampleValue(const NvdbRegularSamplerState &state, + const SpatialFieldGPUData &, + const vec3 &p) +{ + return nvdbSampleAtIndex(state, nvdbIndexPos(state, p)); +} - const float value = state.linearSampler(clamped); - if (gradient) { - const float sxp = state.linearSampler(clampNvdb( - indexPos + nanovdb::Vec3f(1, 0, 0), state.indexMin, state.indexMax)); - const float sxn = state.linearSampler(clampNvdb( - indexPos - nanovdb::Vec3f(1, 0, 0), state.indexMin, state.indexMax)); - const float syp = state.linearSampler(clampNvdb( - indexPos + nanovdb::Vec3f(0, 1, 0), state.indexMin, state.indexMax)); - const float syn = state.linearSampler(clampNvdb( - indexPos - nanovdb::Vec3f(0, 1, 0), state.indexMin, state.indexMax)); - const float szp = state.linearSampler(clampNvdb( - indexPos + nanovdb::Vec3f(0, 0, 1), state.indexMin, state.indexMax)); - const float szn = state.linearSampler(clampNvdb( - indexPos - nanovdb::Vec3f(0, 0, 1), state.indexMin, state.indexMax)); - *gradient = vec3((sxp - sxn) * state.scale[0] * state.invTwoVoxelSize[0], - (syp - syn) * state.scale[1] * state.invTwoVoxelSize[1], - (szp - szn) * state.scale[2] * state.invTwoVoxelSize[2]); - } - return value; +template +VISRTX_DEVICE vec3 sampleNormal( + const NvdbRegularSamplerState &state, + const SpatialFieldGPUData &, + const vec3 &p) +{ + // Central differences at ±1 voxel in index space, mapped to object space. + const auto indexPos = nvdbIndexPos(state, p); + const float sxp = nvdbSampleAtIndex(state, indexPos + nanovdb::Vec3f(1, 0, 0)); + const float sxn = nvdbSampleAtIndex(state, indexPos - nanovdb::Vec3f(1, 0, 0)); + const float syp = nvdbSampleAtIndex(state, indexPos + nanovdb::Vec3f(0, 1, 0)); + const float syn = nvdbSampleAtIndex(state, indexPos - nanovdb::Vec3f(0, 1, 0)); + const float szp = nvdbSampleAtIndex(state, indexPos + nanovdb::Vec3f(0, 0, 1)); + const float szn = nvdbSampleAtIndex(state, indexPos - nanovdb::Vec3f(0, 0, 1)); + return vec3((sxp - sxn) * state.scale[0] * state.invTwoVoxelSize[0], + (syp - syn) * state.scale[1] * state.invTwoVoxelSize[1], + (szp - szn) * state.scale[2] * state.invTwoVoxelSize[2]); } } // namespace visrtx diff --git a/devices/rtx/device/spatial_field/NvdbRegularSampler_ptx.cu b/devices/rtx/device/spatial_field/NvdbRegularSampler_ptx.cu index ce0ee5037..d5738f79b 100644 --- a/devices/rtx/device/spatial_field/NvdbRegularSampler_ptx.cu +++ b/devices/rtx/device/spatial_field/NvdbRegularSampler_ptx.cu @@ -30,89 +30,21 @@ */ // OptiX direct-callable entry points for the NanoVDB regular-grid sampler. -// The actual implementations live in NvdbRegularSamplerInline.h so the volume -// integrator can call them inline on hot paths; this file only registers them -// against the SBT slots for the fallback dispatch path. +// Only the Woodcock-body callables are exposed via the SBT; value/normal/init +// sampling stays inline (NvdbRegularSamplerInline.h) since the bodies below +// resolve sampleValue/sampleNormal by ADL on the concrete state type, and the +// volume integrator calls the inline sampleValue/sampleNormal directly on hot +// paths. #include "NvdbRegularSamplerInline.h" #include "gpu/volumeIntegrationDetail.h" using namespace visrtx; -// Fp4 sampler -VISRTX_CALLABLE void __direct_callable__initNvdbSamplerFp4( - VolumeSamplingState *samplerState, const SpatialFieldGPUData *field) -{ - initNvdbSampler(samplerState->nvdbFp4, field); -} - -VISRTX_CALLABLE float __direct_callable__sampleNvdbFp4( - const VolumeSamplingState *samplerState, const vec3 *location, - vec3 *gradient) -{ - return sampleNvdb(samplerState->nvdbFp4, location, gradient); -} - -// Fp8 sampler -VISRTX_CALLABLE void __direct_callable__initNvdbSamplerFp8( - VolumeSamplingState *samplerState, const SpatialFieldGPUData *field) -{ - initNvdbSampler(samplerState->nvdbFp8, field); -} - -VISRTX_CALLABLE float __direct_callable__sampleNvdbFp8( - const VolumeSamplingState *samplerState, const vec3 *location, - vec3 *gradient) -{ - return sampleNvdb(samplerState->nvdbFp8, location, gradient); -} - -// Fp16 sampler -VISRTX_CALLABLE void __direct_callable__initNvdbSamplerFp16( - VolumeSamplingState *samplerState, const SpatialFieldGPUData *field) -{ - initNvdbSampler(samplerState->nvdbFp16, field); -} - -VISRTX_CALLABLE float __direct_callable__sampleNvdbFp16( - const VolumeSamplingState *samplerState, const vec3 *location, - vec3 *gradient) -{ - return sampleNvdb(samplerState->nvdbFp16, location, gradient); -} - -// FpN sampler -VISRTX_CALLABLE void __direct_callable__initNvdbSamplerFpN( - VolumeSamplingState *samplerState, const SpatialFieldGPUData *field) -{ - initNvdbSampler(samplerState->nvdbFpN, field); -} - -VISRTX_CALLABLE float __direct_callable__sampleNvdbFpN( - const VolumeSamplingState *samplerState, const vec3 *location, - vec3 *gradient) -{ - return sampleNvdb(samplerState->nvdbFpN, location, gradient); -} - -// Float sampler -VISRTX_CALLABLE void __direct_callable__initNvdbSamplerFloat( - VolumeSamplingState *samplerState, const SpatialFieldGPUData *field) -{ - initNvdbSampler(samplerState->nvdbFloat, field); -} - -VISRTX_CALLABLE float __direct_callable__sampleNvdbFloat( - const VolumeSamplingState *samplerState, const vec3 *location, - vec3 *gradient) -{ - return sampleNvdb(samplerState->nvdbFloat, location, gradient); -} - // Woodcock-body callables — one per variant. Each stack-allocates the typed // sampler state, inits it once, then runs the shared body from -// volumeIntegrationDetail.h with __device__ lambdas resolving to the -// variant's inline sampleNvdb. +// volumeIntegrationDetail.h, which calls the variant's inline +// sampleValue/sampleNormal. #define VISRTX_DEFINE_NVDB_WOODCOCK_CALLABLES(Suffix, ValueType) \ VISRTX_CALLABLE float __direct_callable__sampleDistance##Suffix( \ ScreenSample *ss, \ @@ -134,14 +66,7 @@ VISRTX_CALLABLE float __direct_callable__sampleNvdbFloat( *albedo, \ *extinction, \ *didScatter, \ - normal, \ - [] __device__(const NvdbRegularSamplerState &s, \ - const SpatialFieldGPUData &, \ - const vec3 &p) { return sampleNvdb(s, &p, nullptr); }, \ - [] __device__(const NvdbRegularSamplerState &s, \ - const SpatialFieldGPUData &, \ - const vec3 &p, \ - vec3 &g) { return sampleNvdb(s, &p, &g); }); \ + normal); \ } \ \ VISRTX_CALLABLE void __direct_callable__ratioTrackTransmittance##Suffix( \ @@ -152,14 +77,8 @@ VISRTX_CALLABLE float __direct_callable__sampleNvdbFloat( SamplerStateBox> stateBox; \ auto &samplerState = stateBox.state; \ initNvdbSampler(samplerState, &field); \ - detail::woodcockRatioTrackTransmittance(*ss, \ - *hit, \ - samplerState, \ - field, \ - *attenuation, \ - [] __device__(const NvdbRegularSamplerState &s, \ - const SpatialFieldGPUData &, \ - const vec3 &p) { return sampleNvdb(s, &p, nullptr); }); \ + detail::woodcockRatioTrackTransmittance( \ + *ss, *hit, samplerState, field, *attenuation); \ } \ \ VISRTX_CALLABLE float __direct_callable__rayMarchVolume##Suffix( \ @@ -182,14 +101,7 @@ VISRTX_CALLABLE float __direct_callable__sampleNvdbFloat( color, \ normal, \ *opacity, \ - invSamplingRate, \ - [] __device__(const NvdbRegularSamplerState &s, \ - const SpatialFieldGPUData &, \ - const vec3 &p) { return sampleNvdb(s, &p, nullptr); }, \ - [] __device__(const NvdbRegularSamplerState &s, \ - const SpatialFieldGPUData &, \ - const vec3 &p, \ - vec3 &g) { return sampleNvdb(s, &p, &g); }); \ + invSamplingRate); \ } VISRTX_DEFINE_NVDB_WOODCOCK_CALLABLES(NvdbFp4, nanovdb::Fp4) diff --git a/devices/rtx/device/spatial_field/StructuredRectilinearSamplerInline.h b/devices/rtx/device/spatial_field/StructuredRectilinearSamplerInline.h index 6c65dd743..8b478e229 100644 --- a/devices/rtx/device/spatial_field/StructuredRectilinearSamplerInline.h +++ b/devices/rtx/device/spatial_field/StructuredRectilinearSamplerInline.h @@ -59,41 +59,52 @@ VISRTX_DEVICE void initStructuredRectilinearSampler( data.cellCentered ? (state.dims + vec3(1)) / extent : state.dims / extent; } -VISRTX_DEVICE float sampleStructuredRectilinear( - const StructuredRectilinearSamplerState &state, - const vec3 *location, - vec3 *gradient) +// Maps an object-space position to texture sample coordinates through the +// per-axis warp LUTs. +VISRTX_DEVICE vec3 structuredRectilinearCoord( + const StructuredRectilinearSamplerState &state, const vec3 &p) { - vec3 normalizedPos = (*location - state.axisBoundsMin) - / (state.axisBoundsMax - state.axisBoundsMin); + vec3 normalizedPos = + (p - state.axisBoundsMin) / (state.axisBoundsMax - state.axisBoundsMin); normalizedPos = vec3(tex1D(state.axisLUT[0], normalizedPos.x), tex1D(state.axisLUT[1], normalizedPos.y), tex1D(state.axisLUT[2], normalizedPos.z)); - const auto sampleCoord = normalizedPos * state.dims + state.offset; - - const float value = - tex3D(state.texObj, sampleCoord.x, sampleCoord.y, sampleCoord.z); + return normalizedPos * state.dims + state.offset; +} - if (gradient) { - const auto px = sampleCoord + vec3(1, 0, 0); - const auto nx = sampleCoord - vec3(1, 0, 0); - const auto py = sampleCoord + vec3(0, 1, 0); - const auto ny = sampleCoord - vec3(0, 1, 0); - const auto pz = sampleCoord + vec3(0, 0, 1); - const auto nz = sampleCoord - vec3(0, 0, 1); +// Shared per-sample API (see gpu/volumeIntegrationDetail.h). sampleValue +// returns the field value; sampleNormal returns the unnormalized object-space +// gradient (the raw normal direction) — callers orient and normalize. `field` +// is unused for built-in fields (present for a uniform overload set). +VISRTX_DEVICE float sampleValue(const StructuredRectilinearSamplerState &state, + const SpatialFieldGPUData &, + const vec3 &p) +{ + const auto c = structuredRectilinearCoord(state, p); + return tex3D(state.texObj, c.x, c.y, c.z); +} - const float sxp = tex3D(state.texObj, px.x, px.y, px.z); - const float sxn = tex3D(state.texObj, nx.x, nx.y, nx.z); - const float syp = tex3D(state.texObj, py.x, py.y, py.z); - const float syn = tex3D(state.texObj, ny.x, ny.y, ny.z); - const float szp = tex3D(state.texObj, pz.x, pz.y, pz.z); - const float szn = tex3D(state.texObj, nz.x, nz.y, nz.z); +VISRTX_DEVICE vec3 sampleNormal(const StructuredRectilinearSamplerState &state, + const SpatialFieldGPUData &, + const vec3 &p) +{ + const auto sampleCoord = structuredRectilinearCoord(state, p); + const auto px = sampleCoord + vec3(1, 0, 0); + const auto nx = sampleCoord - vec3(1, 0, 0); + const auto py = sampleCoord + vec3(0, 1, 0); + const auto ny = sampleCoord - vec3(0, 1, 0); + const auto pz = sampleCoord + vec3(0, 0, 1); + const auto nz = sampleCoord - vec3(0, 0, 1); - *gradient = - vec3(sxp - sxn, syp - syn, szp - szn) * state.invAvgVoxelSpacing * 0.5f; - } + const float sxp = tex3D(state.texObj, px.x, px.y, px.z); + const float sxn = tex3D(state.texObj, nx.x, nx.y, nx.z); + const float syp = tex3D(state.texObj, py.x, py.y, py.z); + const float syn = tex3D(state.texObj, ny.x, ny.y, ny.z); + const float szp = tex3D(state.texObj, pz.x, pz.y, pz.z); + const float szn = tex3D(state.texObj, nz.x, nz.y, nz.z); - return value; + return vec3(sxp - sxn, syp - syn, szp - szn) * state.invAvgVoxelSpacing + * 0.5f; } } // namespace visrtx diff --git a/devices/rtx/device/spatial_field/StructuredRectilinearSampler_ptx.cu b/devices/rtx/device/spatial_field/StructuredRectilinearSampler_ptx.cu index ef280154d..4e011350b 100644 --- a/devices/rtx/device/spatial_field/StructuredRectilinearSampler_ptx.cu +++ b/devices/rtx/device/spatial_field/StructuredRectilinearSampler_ptx.cu @@ -30,29 +30,15 @@ */ // OptiX direct-callable entry points for the structured rectilinear sampler. -// Implementations live in StructuredRectilinearSamplerInline.h. +// Only the Woodcock-body callables are exposed via the SBT; value/normal/init +// sampling stays inline (StructuredRectilinearSamplerInline.h) since the bodies +// below resolve sampleValue/sampleNormal by ADL on the concrete state type. #include "StructuredRectilinearSamplerInline.h" #include "gpu/volumeIntegrationDetail.h" using namespace visrtx; -VISRTX_CALLABLE void __direct_callable__initStructuredRectilinearSampler( - VolumeSamplingState *samplerState, const SpatialFieldGPUData *field) -{ - initStructuredRectilinearSampler( - samplerState->structuredRectilinear, field); -} - -VISRTX_CALLABLE float __direct_callable__sampleStructuredRectilinear( - const VolumeSamplingState *samplerState, - const vec3 *location, - vec3 *gradient) -{ - return sampleStructuredRectilinear( - samplerState->structuredRectilinear, location, gradient); -} - VISRTX_CALLABLE float __direct_callable__sampleDistanceStructuredRectilinear(ScreenSample *ss, const VolumeHit *hit, @@ -73,16 +59,7 @@ __direct_callable__sampleDistanceStructuredRectilinear(ScreenSample *ss, *albedo, *extinction, *didScatter, - normal, - [] __device__(const StructuredRectilinearSamplerState &s, - const SpatialFieldGPUData &, - const vec3 &p) { - return sampleStructuredRectilinear(s, &p, nullptr); - }, - [] __device__(const StructuredRectilinearSamplerState &s, - const SpatialFieldGPUData &, - const vec3 &p, - vec3 &g) { return sampleStructuredRectilinear(s, &p, &g); }); + normal); } VISRTX_CALLABLE void @@ -94,16 +71,8 @@ __direct_callable__ratioTrackTransmittanceStructuredRectilinear( SamplerStateBox stateBox; auto &samplerState = stateBox.state; initStructuredRectilinearSampler(samplerState, &field); - detail::woodcockRatioTrackTransmittance(*ss, - *hit, - samplerState, - field, - *attenuation, - [] __device__(const StructuredRectilinearSamplerState &s, - const SpatialFieldGPUData &, - const vec3 &p) { - return sampleStructuredRectilinear(s, &p, nullptr); - }); + detail::woodcockRatioTrackTransmittance( + *ss, *hit, samplerState, field, *attenuation); } VISRTX_CALLABLE float __direct_callable__rayMarchVolumeStructuredRectilinear( @@ -119,21 +88,6 @@ VISRTX_CALLABLE float __direct_callable__rayMarchVolumeStructuredRectilinear( SamplerStateBox stateBox; auto &samplerState = stateBox.state; initStructuredRectilinearSampler(samplerState, &field); - return detail::latticeRayMarchVolume(*ss, - *hit, - samplerState, - field, - color, - normal, - *opacity, - invSamplingRate, - [] __device__(const StructuredRectilinearSamplerState &s, - const SpatialFieldGPUData &, - const vec3 &p) { - return sampleStructuredRectilinear(s, &p, nullptr); - }, - [] __device__(const StructuredRectilinearSamplerState &s, - const SpatialFieldGPUData &, - const vec3 &p, - vec3 &g) { return sampleStructuredRectilinear(s, &p, &g); }); + return detail::latticeRayMarchVolume( + *ss, *hit, samplerState, field, color, normal, *opacity, invSamplingRate); } diff --git a/devices/rtx/device/spatial_field/StructuredRegularSamplerInline.h b/devices/rtx/device/spatial_field/StructuredRegularSamplerInline.h index b59e76d8c..f629fac89 100644 --- a/devices/rtx/device/spatial_field/StructuredRegularSamplerInline.h +++ b/devices/rtx/device/spatial_field/StructuredRegularSamplerInline.h @@ -50,34 +50,38 @@ VISRTX_DEVICE void initStructuredRegularSampler( vec3(field->data.structuredRegular.cellCentered ? 0.0f : 0.5f); } -VISRTX_DEVICE float sampleStructuredRegular( - const StructuredRegularSamplerState &state, - const vec3 *location, - vec3 *gradient) +// Shared per-sample API (see gpu/volumeIntegrationDetail.h). sampleValue +// returns the field value; sampleNormal returns the unnormalized object-space +// gradient (the raw normal direction) — callers orient and normalize. `field` +// is unused for built-in fields (present for a uniform overload set). +VISRTX_DEVICE float sampleValue(const StructuredRegularSamplerState &state, + const SpatialFieldGPUData &, + const vec3 &p) { - const auto texelCoords = (*location - state.origin) * state.invSpacing; - const auto coords = texelCoords + state.offset; - const float value = tex3D(state.texObj, coords.x, coords.y, coords.z); - - if (gradient) { - const auto px = coords + vec3(1, 0, 0); - const auto nx = coords - vec3(1, 0, 0); - const auto py = coords + vec3(0, 1, 0); - const auto ny = coords - vec3(0, 1, 0); - const auto pz = coords + vec3(0, 0, 1); - const auto nz = coords - vec3(0, 0, 1); + const auto coords = (p - state.origin) * state.invSpacing + state.offset; + return tex3D(state.texObj, coords.x, coords.y, coords.z); +} - const float sxp = tex3D(state.texObj, px.x, px.y, px.z); - const float sxn = tex3D(state.texObj, nx.x, nx.y, nx.z); - const float syp = tex3D(state.texObj, py.x, py.y, py.z); - const float syn = tex3D(state.texObj, ny.x, ny.y, ny.z); - const float szp = tex3D(state.texObj, pz.x, pz.y, pz.z); - const float szn = tex3D(state.texObj, nz.x, nz.y, nz.z); +VISRTX_DEVICE vec3 sampleNormal(const StructuredRegularSamplerState &state, + const SpatialFieldGPUData &, + const vec3 &p) +{ + const auto coords = (p - state.origin) * state.invSpacing + state.offset; + const auto px = coords + vec3(1, 0, 0); + const auto nx = coords - vec3(1, 0, 0); + const auto py = coords + vec3(0, 1, 0); + const auto ny = coords - vec3(0, 1, 0); + const auto pz = coords + vec3(0, 0, 1); + const auto nz = coords - vec3(0, 0, 1); - *gradient = vec3(sxp - sxn, syp - syn, szp - szn) * state.invSpacing * 0.5f; - } + const float sxp = tex3D(state.texObj, px.x, px.y, px.z); + const float sxn = tex3D(state.texObj, nx.x, nx.y, nx.z); + const float syp = tex3D(state.texObj, py.x, py.y, py.z); + const float syn = tex3D(state.texObj, ny.x, ny.y, ny.z); + const float szp = tex3D(state.texObj, pz.x, pz.y, pz.z); + const float szn = tex3D(state.texObj, nz.x, nz.y, nz.z); - return value; + return vec3(sxp - sxn, syp - syn, szp - szn) * state.invSpacing * 0.5f; } } // namespace visrtx diff --git a/devices/rtx/device/spatial_field/StructuredRegularSampler_ptx.cu b/devices/rtx/device/spatial_field/StructuredRegularSampler_ptx.cu index 07aedc3e1..7072ca9a3 100644 --- a/devices/rtx/device/spatial_field/StructuredRegularSampler_ptx.cu +++ b/devices/rtx/device/spatial_field/StructuredRegularSampler_ptx.cu @@ -30,28 +30,15 @@ */ // OptiX direct-callable entry points for the structured regular sampler. -// Implementations live in StructuredRegularSamplerInline.h. +// Only the Woodcock-body callables are exposed via the SBT; value/normal/init +// sampling stays inline (StructuredRegularSamplerInline.h) since the bodies +// below resolve sampleValue/sampleNormal by ADL on the concrete state type. #include "StructuredRegularSamplerInline.h" #include "gpu/volumeIntegrationDetail.h" using namespace visrtx; -VISRTX_CALLABLE void __direct_callable__initStructuredRegularSampler( - VolumeSamplingState *samplerState, const SpatialFieldGPUData *field) -{ - initStructuredRegularSampler(samplerState->structuredRegular, field); -} - -VISRTX_CALLABLE float __direct_callable__sampleStructuredRegular( - const VolumeSamplingState *samplerState, - const vec3 *location, - vec3 *gradient) -{ - return sampleStructuredRegular( - samplerState->structuredRegular, location, gradient); -} - // Woodcock-body callables — single variant, no macro fan-out. VISRTX_CALLABLE float __direct_callable__sampleDistanceStructuredRegular( ScreenSample *ss, @@ -73,14 +60,7 @@ VISRTX_CALLABLE float __direct_callable__sampleDistanceStructuredRegular( *albedo, *extinction, *didScatter, - normal, - [] __device__(const StructuredRegularSamplerState &s, - const SpatialFieldGPUData &, - const vec3 &p) { return sampleStructuredRegular(s, &p, nullptr); }, - [] __device__(const StructuredRegularSamplerState &s, - const SpatialFieldGPUData &, - const vec3 &p, - vec3 &g) { return sampleStructuredRegular(s, &p, &g); }); + normal); } VISRTX_CALLABLE void @@ -92,14 +72,8 @@ __direct_callable__ratioTrackTransmittanceStructuredRegular( SamplerStateBox stateBox; auto &samplerState = stateBox.state; initStructuredRegularSampler(samplerState, &field); - detail::woodcockRatioTrackTransmittance(*ss, - *hit, - samplerState, - field, - *attenuation, - [] __device__(const StructuredRegularSamplerState &s, - const SpatialFieldGPUData &, - const vec3 &p) { return sampleStructuredRegular(s, &p, nullptr); }); + detail::woodcockRatioTrackTransmittance( + *ss, *hit, samplerState, field, *attenuation); } VISRTX_CALLABLE float __direct_callable__rayMarchVolumeStructuredRegular( @@ -115,19 +89,6 @@ VISRTX_CALLABLE float __direct_callable__rayMarchVolumeStructuredRegular( SamplerStateBox stateBox; auto &samplerState = stateBox.state; initStructuredRegularSampler(samplerState, &field); - return detail::latticeRayMarchVolume(*ss, - *hit, - samplerState, - field, - color, - normal, - *opacity, - invSamplingRate, - [] __device__(const StructuredRegularSamplerState &s, - const SpatialFieldGPUData &, - const vec3 &p) { return sampleStructuredRegular(s, &p, nullptr); }, - [] __device__(const StructuredRegularSamplerState &s, - const SpatialFieldGPUData &, - const vec3 &p, - vec3 &g) { return sampleStructuredRegular(s, &p, &g); }); + return detail::latticeRayMarchVolume( + *ss, *hit, samplerState, field, color, normal, *opacity, invSamplingRate); } diff --git a/devices/rtx/device/spatial_field/space_skipping/UniformGrid.cu b/devices/rtx/device/spatial_field/space_skipping/UniformGrid.cu index 4814d90c5..f616b53b2 100644 --- a/devices/rtx/device/spatial_field/space_skipping/UniformGrid.cu +++ b/devices/rtx/device/spatial_field/space_skipping/UniformGrid.cu @@ -38,6 +38,14 @@ namespace visrtx { +// Defined in UniformGridCustom.cu (a separate TU so it can include the custom +// sampler dispatch header). Default stream; the cudaFree below synchronizes. +void launchCustomValueRanges(box1 *valueRanges, + ivec3 mcDims, + box3 objectBounds, + const SpatialFieldGPUData *dSfgd, + cudaStream_t stream); + __global__ void computeOpacityBoundsGPU(float2 *opacityBounds, const box1 *valueRanges, cudaTextureObject_t colorMap, @@ -210,6 +218,10 @@ void UniformGrid::computeValueRanges(const SpatialFieldGPUData &sfgd) case SbtCallableEntryPoints::SpatialFieldSamplerNvdbRectilinearFloat: LAUNCH_BUILD_GRID(NvdbRectilinearSpatialFieldAccessor); break; + case SbtCallableEntryPoints::SpatialFieldSamplerCustom: + launchCustomValueRanges( + m_valueRanges, m_dims, m_objectBounds, sfgdDevice, /*stream=*/0); + break; default: break; } diff --git a/devices/rtx/device/spatial_field/space_skipping/UniformGridCustom.cu b/devices/rtx/device/spatial_field/space_skipping/UniformGridCustom.cu new file mode 100644 index 000000000..94f443ef5 --- /dev/null +++ b/devices/rtx/device/spatial_field/space_skipping/UniformGridCustom.cu @@ -0,0 +1,175 @@ +/* + * Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: BSD-3-Clause + */ + +// Per-macrocell value-range grid for custom spatial fields. A plain CUDA +// kernel cannot optixDirectCall the custom SBT sampler, but the user sample +// function is an inline __device__ function reachable from any TU that includes +// the dispatch header — so this TU re-expands VISRTX_CUSTOM_SAMPLE_DISPATCH +// directly. Compiled into the library unconditionally; only active when the +// custom-field provider's CMake defines VISRTX_CUSTOM_SAMPLERS_HEADER / +// VISRTX_CUSTOM_FIELD_DATA_HEADER on the library target (else a stub). + +#include +#include +#include "UniformGrid.h" +#include "gpu/gpu_decl.h" +#include "gpu/gpu_math.h" +#include "gpu/gpu_objects.h" + +#ifdef VISRTX_CUSTOM_FIELD_DATA_HEADER +#include VISRTX_CUSTOM_FIELD_DATA_HEADER +#endif +#ifdef VISRTX_CUSTOM_SAMPLERS_HEADER +#include VISRTX_CUSTOM_SAMPLERS_HEADER +#endif + +namespace visrtx { + +#ifdef VISRTX_CUSTOM_SAMPLERS_HEADER + +namespace { + +constexpr int CUSTOM_VALUE_RANGE_SUPERSAMPLE = 4; // S; S^3 samples per cell (fallback) + +// Only the no-dispatch supersample fallback (the kernel's #else below) samples +// the field directly; guard the helper to that case so it isn't compiled +// (and flagged unreferenced) when a value-range dispatch is provided. +#if !defined(VISRTX_CUSTOM_VALUE_RANGE_DISPATCH) \ + && !defined(VISRTX_CUSTOM_GLOBAL_VALUE_RANGE_DISPATCH) +VISRTX_DEVICE float sampleCustomValue( + const CustomFieldData &data, const vec3 &P) +{ +#ifdef VISRTX_CUSTOM_SAMPLE_DISPATCH + VISRTX_CUSTOM_SAMPLE_DISPATCH(data, P) +#else + return 0.0f; +#endif +} +#endif + +#ifdef VISRTX_CUSTOM_VALUE_RANGE_DISPATCH +// Field-supplied conservative value interval over an object-space AABB. Both +// ends are real bounds; the volume cannot vanish or leak. +VISRTX_DEVICE box1 customCellValueRange( + const CustomFieldData &data, const vec3 &boxLo, const vec3 &boxHi) +{ + VISRTX_CUSTOM_VALUE_RANGE_DISPATCH(data, boxLo, boxHi) +} +#elif defined(VISRTX_CUSTOM_GLOBAL_VALUE_RANGE_DISPATCH) +// Field-supplied conservative interval over the whole domain. Constant per cell +// (no space skipping) but never wrong. +VISRTX_DEVICE box1 customGlobalValueRange(const CustomFieldData &data) +{ + VISRTX_CUSTOM_GLOBAL_VALUE_RANGE_DISPATCH(data) +} +#endif + +// Per macrocell, emit a conservative value interval over the cell's object-space +// AABB. Single pass — no lower-bound fabrication needed. +__global__ void customCellRangeGPU(box1 *valueRanges, + ivec3 mcDims, + box3 objectBounds, + const SpatialFieldGPUData *sfgd, + int S) +{ + size_t threadID = blockIdx.x * size_t(blockDim.x) + threadIdx.x; + size_t numMCs = size_t(mcDims.x) * mcDims.y * mcDims.z; + if (threadID >= numMCs) + return; + + ivec3 mcID(threadID % mcDims.x, + threadID / mcDims.x % mcDims.y, + threadID / (size_t(mcDims.x) * mcDims.y)); + + const vec3 ext = objectBounds.upper - objectBounds.lower; + const vec3 normLo = vec3(mcID) / vec3(mcDims); + const vec3 normHi = vec3(mcID + ivec3(1)) / vec3(mcDims); + const vec3 objLo = objectBounds.lower + normLo * ext; + const vec3 objHi = objectBounds.lower + normHi * ext; + + const CustomFieldData &data = sfgd->data.custom; + +#ifdef VISRTX_CUSTOM_VALUE_RANGE_DISPATCH + (void)S; + valueRanges[threadID] = customCellValueRange(data, objLo, objHi); +#elif defined(VISRTX_CUSTOM_GLOBAL_VALUE_RANGE_DISPATCH) + (void)S; + valueRanges[threadID] = customGlobalValueRange(data); +#else +#warning \ + "Custom field defines samplers but no value-range dispatch; macrocell " \ + "bounds are best-effort point-supersampled and MAY mis-bound the true " \ + "field (volume can render too transparent / dark / vanish). Define " \ + "VISRTX_CUSTOM_VALUE_RANGE_DISPATCH (tight per-AABB) or " \ + "VISRTX_CUSTOM_GLOBAL_VALUE_RANGE_DISPATCH (conservative constant) for a " \ + "correctness guarantee." + // Best-effort: point-supersample real lo AND hi (no longer clamped to 0). + const vec3 cellExt = objHi - objLo; + float lo = std::numeric_limits::infinity(); + float hi = -std::numeric_limits::infinity(); + for (int iz = 0; iz < S; ++iz) + for (int iy = 0; iy < S; ++iy) + for (int ix = 0; ix < S; ++ix) { + const vec3 frac((ix + 0.5f) / S, (iy + 0.5f) / S, (iz + 0.5f) / S); + const float v = sampleCustomValue(data, objLo + frac * cellExt); + if (!isnan(v) && !isinf(v)) { + lo = fminf(lo, v); + hi = fmaxf(hi, v); + } + } + valueRanges[threadID] = (lo <= hi) + ? box1{lo, hi} + : box1{std::numeric_limits::infinity(), + -std::numeric_limits::infinity()}; +#endif +} + +} // namespace + +void launchCustomValueRanges(box1 *valueRanges, + ivec3 mcDims, + box3 objectBounds, + const SpatialFieldGPUData *dSfgd, + cudaStream_t stream) +{ + size_t numMCs = size_t(mcDims.x) * mcDims.y * mcDims.z; + if (numMCs == 0) + return; + const int threads = 256; + const int blocks = int(iDivUp(int64_t(numMCs), threads)); + customCellRangeGPU<<>>( + valueRanges, mcDims, objectBounds, dSfgd, CUSTOM_VALUE_RANGE_SUPERSAMPLE); +} + +#else // no custom field configured: write empty sentinel everywhere + +namespace { +__global__ void customEmptyGPU(box1 *valueRanges, size_t numMCs) +{ + size_t threadID = blockIdx.x * size_t(blockDim.x) + threadIdx.x; + if (threadID >= numMCs) + return; + valueRanges[threadID].lower = std::numeric_limits::infinity(); + valueRanges[threadID].upper = -std::numeric_limits::infinity(); +} +} // namespace + +void launchCustomValueRanges(box1 *valueRanges, + ivec3 mcDims, + box3 /*objectBounds*/, + const SpatialFieldGPUData * /*dSfgd*/, + cudaStream_t stream) +{ + size_t numMCs = size_t(mcDims.x) * mcDims.y * mcDims.z; + if (numMCs == 0) + return; + const int threads = 256; + const int blocks = int(iDivUp(int64_t(numMCs), threads)); + customEmptyGPU<<>>(valueRanges, numMCs); +} + +#endif + +} // namespace visrtx diff --git a/tsd/apps/interactive/demos/customField/CMakeLists.txt b/tsd/apps/interactive/demos/customField/CMakeLists.txt index c0aaf35a0..383647f54 100644 --- a/tsd/apps/interactive/demos/customField/CMakeLists.txt +++ b/tsd/apps/interactive/demos/customField/CMakeLists.txt @@ -38,6 +38,13 @@ if (TARGET CustomFieldSampler_ptx) ) endif() +## UniformGridCustom.cu (core library source) needs the same dispatch headers +## to build per-cell majorants for the custom field. +target_compile_definitions(anari_library_visrtx PRIVATE + VISRTX_CUSTOM_FIELD_DATA_HEADER="WeightedPointsFieldData.h" + VISRTX_CUSTOM_SAMPLERS_HEADER="WeightedPointsFieldDispatch.h" +) + ## --------------------------------------------------------------------------- ## 2. Build the demo application ## --------------------------------------------------------------------------- diff --git a/tsd/apps/interactive/demos/customField/README.md b/tsd/apps/interactive/demos/customField/README.md index 1cf27c5b3..1e916c582 100644 --- a/tsd/apps/interactive/demos/customField/README.md +++ b/tsd/apps/interactive/demos/customField/README.md @@ -20,17 +20,35 @@ build time: handles parameter parsing (`commitParameters`), host-to-device data transfer (`finalize`), and spatial metadata (`bounds`, `stepSize`). -3. **Write the GPU sampler**: A `__device__` function that evaluates the field - at an arbitrary 3D point, dispatched via the +3. **Write the GPU sampler**: A `__host__ __device__` function that evaluates + the field at an arbitrary 3D point, dispatched via the `VISRTX_CUSTOM_SAMPLE_DISPATCH` macro. -4. **Register at static init**: A small registration file calls +4. **Supply a conservative value range** (optional but recommended): A function + returning a `{lo, hi}` interval that bounds the field value, used by VisRTX + to build a per-macrocell space-skipping grid for delta tracking. Two hooks + are available — define exactly one: + + | Macro | Signature | Trade-off | + |-------|-----------|-----------| + | `VISRTX_CUSTOM_VALUE_RANGE_DISPATCH(data, boxLo, boxHi)` | `box1` over an object-space AABB | Tight per-cell bounds → best space skipping | + | `VISRTX_CUSTOM_GLOBAL_VALUE_RANGE_DISPATCH(data)` | `box1` over the whole domain | Constant bound, zero extra cost → no space skipping | + + Without either hook the engine falls back to point-supersampling each cell, + which can mis-bound the field (the volume may render too dark or vanish). See + *Space skipping & majorants* below. + +5. **Register at static init**: A small registration file calls `visrtx::registerCustomField("subtypeName", factory)`, which inserts the type into the `SpatialFieldRegistry`. The ANARI device discovers it at runtime when `anariNewSpatialField(device, "subtypeName")` is called. -All four pieces are compiled into `libanari_library_visrtx.so` via CMake -`target_sources`, keeping the core VisRTX codebase unchanged. +The sample and value-range macros, plus their `__device__` helpers, live in a +single dispatch header (`WeightedPointsFieldDispatch.h`) that CMake wires into +the core library via the `VISRTX_CUSTOM_FIELD_DATA_HEADER` and +`VISRTX_CUSTOM_SAMPLERS_HEADER` compile definitions. All pieces are compiled +into `libanari_library_visrtx.so` via CMake `target_sources`, keeping the core +VisRTX codebase unchanged. ## The Weighted Points Field @@ -62,6 +80,27 @@ This gives O(log N) sampling cost with controllable quality via two parameters: | `sigma` | Gaussian kernel width (Å). Controls how "blobby" each atom appears. Auto-computed from median nearest-neighbor distance. | | `cutoff` | LOD distance threshold (Å). Nodes farther than this from the sample point are approximated by their aggregate. Auto-computed from domain diagonal. | +### Space skipping & majorants + +VisRTX renders volumes with delta tracking, which needs a conservative `{lo, +hi}` value interval over each region it steps through. For custom fields this +comes from one of the two value-range hooks described in step 4: + +- **`VISRTX_CUSTOM_VALUE_RANGE_DISPATCH`** — per-AABB interval, enables tight + space skipping. Used by this demo via `ValueRangeWeightedPoints.cuh`. +- **`VISRTX_CUSTOM_GLOBAL_VALUE_RANGE_DISPATCH`** — single domain-wide interval, + simpler to implement but provides no per-cell empty-space culling. + +The weighted points field implements the per-AABB variant by summing, over *all* +octree nodes, each node's largest Gaussian contribution within the AABB +(evaluated at the box point closest to the node center). + +This is provably conservative — the sampler's value at any point is a sum over +an LOD cut, which is a *subset* of all nodes, so the all-nodes sum can only +over-estimate. The result is a per-macrocell `{0, hi}` grid that lets the +renderer skip empty space without ever under-bounding the field. See +`fields/samplers/ValueRangeWeightedPoints.cuh` for the derivation. + ### GPU data layout The octree is serialized into two flat arrays for GPU consumption: @@ -71,6 +110,10 @@ The octree is serialized into two flat arrays for GPU consumption: - **indices** (`int32×2 per node`): `[childBegin, childEnd)`: index range of children in the values/indices arrays. Leaves have `(0, 0)`. +The `WeightedPointsFieldData` struct also carries the precomputed `1/(2σ²)` +factor and the conservative global `maxValue` used as the value-range fallback +when the octree is empty. + ## PDB File Support The demo can load atomic coordinates from **Protein Data Bank (PDB)** files, @@ -86,6 +129,12 @@ tsdDemoCustomField --pdb /path/to/structure.pdb Or without arguments for a random point cloud. +The ImGui controls panel exposes `sigma`/`cutoff`, the transfer function, and an +**Animation** section that perturbs the points over time (amplitude expressed in +multiples of the median nearest-neighbor distance, so motion scales with the +data). The octree is rebuilt each frame via a fast path that keeps blob size +stable across the animation. + ### Why weighted points for molecular data? Traditional molecular viewers render atoms as discrete spheres or stick models. @@ -110,11 +159,14 @@ The weighted points field offers a complementary visualization: customField/ ├── fields/ │ ├── WeightedPointsFieldData.h # GPU data struct (shared CPU/GPU) -│ ├── WeightedPointsFieldDispatch.h # OptiX sampler dispatch macro +│ ├── WeightedPointsFieldDispatch.h # Sample + value-range dispatch macros +│ ├── samplers/ +│ │ ├── SampleWeightedPoints.cuh # __host__ __device__ field evaluator +│ │ └── ValueRangeWeightedPoints.cuh # Conservative per-AABB value interval │ ├── WeightedPointsField.h/cpp # Host-side ANARI object │ └── RegisterWeightedPointsField.cpp # Static registration ├── WeightedPointsOctree.h/cpp # CPU octree builder -├── WeightedPointsControls.h/cpp # ImGui UI panel + PDB loader +├── WeightedPointsControls.h/cpp # ImGui UI panel + PDB loader + animation ├── tsdDemoCustomField.cpp # Application entry point ├── CMakeLists.txt # Build configuration └── README.md # This file diff --git a/tsd/apps/interactive/demos/customField/WeightedPointsControls.cpp b/tsd/apps/interactive/demos/customField/WeightedPointsControls.cpp index 545344d15..4822d7a5e 100644 --- a/tsd/apps/interactive/demos/customField/WeightedPointsControls.cpp +++ b/tsd/apps/interactive/demos/customField/WeightedPointsControls.cpp @@ -116,8 +116,8 @@ void WeightedPointsControls::buildUI() "Amplitude", &m_perturbAmplitude, 0.01f, 0.f, 20.f, "%.3f"); if (ImGui::IsItemHovered()) { ImGui::SetTooltip( - "Maximum displacement of points from their original positions " - "(in data units). Scaled by median nearest-neighbor distance."); + "Maximum point displacement, in multiples of the median " + "nearest-neighbor distance (1 ~= one neighbor spacing)."); } ImGui::DragFloat( @@ -156,6 +156,30 @@ void WeightedPointsControls::createScene() auto *layer = scene.defaultLayer(); layer->clear(); + // Tear down the previous scene before rebuilding (Regenerate / source change). + // Remove the parameter-holding objects (volume, field) before the arrays they + // reference, so their PARAMETER uses are released against live pool slots. + // Leaving them stranded lets their stale parameter handles later decrement + // reused array slots (the "decUseCount on zero use count" warnings). + if (m_volume) + scene.removeObject(m_volume.data()); + if (m_field) + scene.removeObject(m_field.data()); + if (m_light) + scene.removeObject(m_light); + if (m_valuesArrayRef) + scene.removeObject(m_valuesArrayRef.data()); + if (m_indicesArrayRef) + scene.removeObject(m_indicesArrayRef.data()); + if (m_colorArrayRef) + scene.removeObject(m_colorArrayRef.data()); + m_volume = {}; + m_field = {}; + m_light = nullptr; + m_valuesArrayRef = {}; + m_indicesArrayRef = {}; + m_colorArrayRef = {}; + generatePoints(); m_originalPoints = m_rawPoints; m_animationSetup = false; @@ -217,6 +241,7 @@ void WeightedPointsControls::createScene() } colorArray->unmap(); m_volume->setParameterObject("color", *colorArray); + m_colorArrayRef = colorArray; // tracked so it is released on the next rebuild layer->root()->insert_first_child({layer, m_volume}); @@ -241,6 +266,29 @@ void WeightedPointsControls::generatePoints() m_rawPoints = generateRandomUniform(); } +void WeightedPointsControls::swapFieldArrays( + const std::vector &values, const std::vector &indices) +{ + auto &scene = appContext()->tsd.scene; + + auto newValues = scene.createArray(ANARI_FLOAT32, values.size()); + newValues->setData(values.data()); + m_field->setParameterObject("values", *newValues); + + auto newIndices = scene.createArray(ANARI_INT32, indices.size()); + newIndices->setData(indices.data()); + m_field->setParameterObject("indices", *newIndices); + + // Release the previous arrays now that the field references the new ones — + // otherwise each animation frame leaks a values/indices pair into the scene. + if (m_valuesArrayRef) + scene.removeObject(m_valuesArrayRef.data()); + if (m_indicesArrayRef) + scene.removeObject(m_indicesArrayRef.data()); + m_valuesArrayRef = newValues; + m_indicesArrayRef = newIndices; +} + void WeightedPointsControls::rebuildField() { if (!m_field || !m_volume || m_rawPoints.empty()) @@ -279,9 +327,11 @@ void WeightedPointsControls::rebuildField() domainMax[c] = bmax[c] + pad; } - float sigma = m_sigmaOverride; - if (sigma <= 0.f) { - // Estimate typical inter-atom spacing via sampled nearest-neighbor distances + // Median nearest-neighbor distance: drives auto-sigma AND the animation + // amplitude scale (perturbPoints displaces in multiples of it), so it is + // computed regardless of whether sigma is overridden. + float medianNN = 1.f; + { size_t nSamp = std::min(n, (size_t)200); size_t sStep = std::max((size_t)1, n / nSamp); std::vector nnDists; @@ -302,11 +352,12 @@ void WeightedPointsControls::rebuildField() nnDists.push_back(std::sqrt(best)); } std::sort(nnDists.begin(), nnDists.end()); - float medianNN = nnDists.empty() - ? 1.f - : nnDists[nnDists.size() / 2]; - sigma = medianNN * 3.f; + if (!nnDists.empty()) + medianNN = nnDists[nnDists.size() / 2]; } + m_medianNN = medianNN; + + float sigma = (m_sigmaOverride > 0.f) ? m_sigmaOverride : medianNN * 3.f; float cutoff = m_cutoffOverride; if (cutoff <= 0.f) { @@ -323,14 +374,9 @@ void WeightedPointsControls::rebuildField() "domainMax", tsd::math::float3(domainMax[0], domainMax[1], domainMax[2])); m_field->setParameter("sigma", sigma); m_field->setParameter("cutoff", cutoff); + m_effectiveSigma = sigma; - auto valuesArray = scene.createArray(ANARI_FLOAT32, flatValues.size()); - valuesArray->setData(flatValues.data()); - m_field->setParameterObject("values", *valuesArray); - - auto indicesArray = scene.createArray(ANARI_INT32, flatIndices.size()); - indicesArray->setData(flatIndices.data()); - m_field->setParameterObject("indices", *indicesArray); + swapFieldArrays(flatValues, flatIndices); float inv2s2 = 1.f / (2.f * sigma * sigma); float maxFieldVal = 0.f; @@ -363,7 +409,7 @@ void WeightedPointsControls::rebuildField() void WeightedPointsControls::rebuildFieldFast() { - if (!m_field || m_rawPoints.empty()) + if (!m_field || !m_volume || m_rawPoints.empty()) return; auto &scene = appContext()->tsd.scene; @@ -378,20 +424,46 @@ void WeightedPointsControls::rebuildFieldFast() WeightedPointsOctree octree; octree.build(points, 8, 12); - - const auto &flatValues = octree.flatValues(); - const auto &flatIndices = octree.flatIndices(); - if (octree.numNodes() == 0) return; - auto valuesArray = scene.createArray(ANARI_FLOAT32, flatValues.size()); - valuesArray->setData(flatValues.data()); - m_field->setParameterObject("values", *valuesArray); + // Track the perturbed point bounds so the field domain — and thus the + // volume's rendered AABB — follows the animation. Without this the points + // drift outside the original (fixed) domain and the volume goes blank. + const float *bmin = octree.boundsMin(); + const float *bmax = octree.boundsMax(); + float dmin[3], dmax[3]; + for (int c = 0; c < 3; c++) { + float pad = std::max(0.1f, (bmax[c] - bmin[c]) * 0.05f); + dmin[c] = bmin[c] - pad; + dmax[c] = bmax[c] + pad; + } + m_field->setParameter( + "domainMin", tsd::math::float3(dmin[0], dmin[1], dmin[2])); + m_field->setParameter( + "domainMax", tsd::math::float3(dmax[0], dmax[1], dmax[2])); + + swapFieldArrays(octree.flatValues(), octree.flatIndices()); - auto indicesArray = scene.createArray(ANARI_INT32, flatIndices.size()); - indicesArray->setData(flatIndices.data()); - m_field->setParameterObject("indices", *indicesArray); + // Keep the TF range matched to the current field max so opacity doesn't wash + // out as points spread/cluster. Sigma is held fixed (m_effectiveSigma) so the + // blob size stays stable across frames. + const float inv2s2 = 1.f / (2.f * m_effectiveSigma * m_effectiveSigma); + float maxFieldVal = 0.f; + size_t nSamples = std::min(n, (size_t)50); + size_t step = std::max((size_t)1, n / nSamples); + for (size_t i = 0; i < n; i += step) { + const float *p = m_rawPoints.data() + i * 4; + float val = 0.f; + for (size_t j = 0; j < n; j++) { + const float *q = m_rawPoints.data() + j * 4; + float dx = p[0] - q[0], dy = p[1] - q[1], dz = p[2] - q[2]; + val += q[3] * std::exp(-(dx * dx + dy * dy + dz * dz) * inv2s2); + } + maxFieldVal = std::max(maxFieldVal, val); + } + tsd::math::float2 valueRange(0.f, maxFieldVal > 0.f ? maxFieldVal : 1.f); + m_volume->setParameter("valueRange", ANARI_FLOAT32_BOX1, &valueRange); scene.signalLayerStructureChanged(scene.defaultLayer()); } @@ -419,6 +491,11 @@ void WeightedPointsControls::perturbPoints(float t) m_rawPoints.resize(m_originalPoints.size()); + // Amplitude is in multiples of the median nearest-neighbor distance, so the + // motion scales with the point cloud (a value of ~1 wiggles each point by + // roughly one neighbor spacing) instead of flinging them off in raw units. + const float amp = m_perturbAmplitude * m_medianNN; + for (size_t i = 0; i < n; i++) { size_t base = i * 4; float seed = static_cast(i); @@ -427,9 +504,9 @@ void WeightedPointsControls::perturbPoints(float t) float dy = std::cos(phase + seed * 2.3f) * std::sin(seed * 0.7f); float dz = std::sin(phase + seed * 3.1f) * std::cos(seed * 1.1f); - m_rawPoints[base + 0] = m_originalPoints[base + 0] + dx * m_perturbAmplitude; - m_rawPoints[base + 1] = m_originalPoints[base + 1] + dy * m_perturbAmplitude; - m_rawPoints[base + 2] = m_originalPoints[base + 2] + dz * m_perturbAmplitude; + m_rawPoints[base + 0] = m_originalPoints[base + 0] + dx * amp; + m_rawPoints[base + 1] = m_originalPoints[base + 1] + dy * amp; + m_rawPoints[base + 2] = m_originalPoints[base + 2] + dz * amp; m_rawPoints[base + 3] = m_originalPoints[base + 3]; } diff --git a/tsd/apps/interactive/demos/customField/WeightedPointsControls.h b/tsd/apps/interactive/demos/customField/WeightedPointsControls.h index d81f1af87..5f4cb3b33 100644 --- a/tsd/apps/interactive/demos/customField/WeightedPointsControls.h +++ b/tsd/apps/interactive/demos/customField/WeightedPointsControls.h @@ -7,6 +7,7 @@ #include #include #include +#include #include namespace tsd::demo { @@ -27,6 +28,10 @@ struct WeightedPointsControls : public tsd::ui::imgui::Window void setupAnimation(); void perturbPoints(float t); + // Swap the field's values/indices arrays, releasing the previous pair. + void swapFieldArrays( + const std::vector &values, const std::vector &indices); + std::vector generateRandomUniform(); std::vector loadPDB(const std::string &path); @@ -34,7 +39,7 @@ struct WeightedPointsControls : public tsd::ui::imgui::Window int m_numPoints{2000}; float m_sigmaOverride{0.f}; float m_cutoffOverride{0.f}; - float m_perturbAmplitude{15.f}; + float m_perturbAmplitude{1.f}; // in units of median nearest-neighbor distance float m_perturbFrequency{2.f}; std::string m_pdbPath; @@ -45,6 +50,18 @@ struct WeightedPointsControls : public tsd::ui::imgui::Window tsd::scene::VolumeRef m_volume; tsd::scene::Object *m_light{nullptr}; + // Current field data arrays, tracked so the previous ones can be released + // each rebuild (otherwise every animation frame leaks a new pair). + tsd::scene::ArrayRef m_valuesArrayRef; + tsd::scene::ArrayRef m_indicesArrayRef; + tsd::scene::ArrayRef m_colorArrayRef; + // Gaussian width chosen by the last full rebuild; reused (not recomputed) by + // the per-frame fast path so blob size stays stable across the animation. + float m_effectiveSigma{1.f}; + // Median nearest-neighbor distance from the last full rebuild; the animation + // amplitude is expressed in multiples of this so motion scales with the data. + float m_medianNN{1.f}; + std::vector m_rawPoints; std::vector m_originalPoints; }; diff --git a/tsd/apps/interactive/demos/customField/fields/WeightedPointsField.cpp b/tsd/apps/interactive/demos/customField/fields/WeightedPointsField.cpp index 5a520fa9a..9c77c4f0b 100644 --- a/tsd/apps/interactive/demos/customField/fields/WeightedPointsField.cpp +++ b/tsd/apps/interactive/demos/customField/fields/WeightedPointsField.cpp @@ -90,12 +90,24 @@ void WeightedPointsField::finalize() if (cutoff <= 0.f) cutoff = 5.f * sigma; + // Conservative global upper bound for the dispatch fallback: any LOD cut is a + // subset of nodes, so summing the weights upper-bounds the field. (A single + // max weight is NOT valid: overlapping Gaussians sum beyond any single one.) + float maxValue = 0.f; + if (m_valuesArray) { + const float *hv = m_valuesArray->beginAs(); + const size_t nn = m_valuesArray->totalSize() / 4; + for (size_t i = 0; i < nn; i++) + maxValue += std::max(0.f, hv[i * 4 + 3]); + } + m_deviceData.values = m_dValues; m_deviceData.indices = m_dIndices; m_deviceData.numNodes = m_numNodes; m_deviceData.sigma = sigma; m_deviceData.inv2SigmaSq = 1.f / (2.f * sigma * sigma); m_deviceData.cutoff = cutoff; + m_deviceData.maxValue = maxValue; m_deviceData.domainMin = m_domainMin; m_deviceData.domainMax = m_domainMax; diff --git a/tsd/apps/interactive/demos/customField/fields/WeightedPointsFieldData.h b/tsd/apps/interactive/demos/customField/fields/WeightedPointsFieldData.h index 717ceeedb..767dec536 100644 --- a/tsd/apps/interactive/demos/customField/fields/WeightedPointsFieldData.h +++ b/tsd/apps/interactive/demos/customField/fields/WeightedPointsFieldData.h @@ -18,6 +18,7 @@ struct WeightedPointsFieldData float sigma; // Gaussian width float inv2SigmaSq; // 1 / (2 * sigma^2), precomputed float cutoff; // initial LOD distance + float maxValue; // conservative global upper bound (fallback) visrtx::vec3 domainMin; visrtx::vec3 domainMax; }; diff --git a/tsd/apps/interactive/demos/customField/fields/WeightedPointsFieldDispatch.h b/tsd/apps/interactive/demos/customField/fields/WeightedPointsFieldDispatch.h index 9e9598b94..24625eb98 100644 --- a/tsd/apps/interactive/demos/customField/fields/WeightedPointsFieldDispatch.h +++ b/tsd/apps/interactive/demos/customField/fields/WeightedPointsFieldDispatch.h @@ -4,6 +4,7 @@ #pragma once #include "WeightedPointsFieldData.h" +#include "samplers/ValueRangeWeightedPoints.cuh" #include "samplers/SampleWeightedPoints.cuh" #define VISRTX_CUSTOM_SAMPLE_DISPATCH(data, P) \ @@ -14,3 +15,15 @@ default: \ return 0.0f; \ } + +#define VISRTX_CUSTOM_VALUE_RANGE_DISPATCH(data, boxLo, boxHi) \ + switch (data.subType) { \ + case visrtx::DEMO_WEIGHTED_POINTS_FIELD_TYPE: \ + return valueRangeWeightedPoints( \ + *reinterpret_cast(data.fieldData), \ + boxLo, boxHi); \ + default: /* unknown subtype: global conservative interval, never vanish */ \ + return visrtx::box1{0.f, \ + reinterpret_cast( \ + data.fieldData)->maxValue}; \ + } diff --git a/tsd/apps/interactive/demos/customField/fields/samplers/ValueRangeWeightedPoints.cuh b/tsd/apps/interactive/demos/customField/fields/samplers/ValueRangeWeightedPoints.cuh new file mode 100644 index 000000000..60e16578a --- /dev/null +++ b/tsd/apps/interactive/demos/customField/fields/samplers/ValueRangeWeightedPoints.cuh @@ -0,0 +1,37 @@ +// Copyright 2025-2026 NVIDIA Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "../WeightedPointsFieldData.h" +#include "gpu/gpu_math.h" + +// Conservative value interval over an object-space AABB for a non-negative +// weighted-points field. The lower bound is always 0; the upper bound sums each +// node's largest Gaussian contribution over the box (evaluated at the box point +// closest to the node center). Conservative because the sampler's value at any +// point is a sum over an LOD cut, which is a SUBSET of all nodes, and each +// node's box maximum upper-bounds its contribution there. +inline __host__ __device__ visrtx::box1 valueRangeWeightedPoints( + const WeightedPointsFieldData &field, + const visrtx::vec3 &boxLo, + const visrtx::vec3 &boxHi) +{ + if (!field.values || field.numNodes <= 0) + return visrtx::box1{0.f, field.maxValue}; + + const float k = field.inv2SigmaSq; // = 1/(2 sigma^2) > 0 + float hi = 0.f; + for (int i = 0; i < field.numNodes; ++i) { + const float w = field.values[i * 4 + 3]; + if (w <= 0.f) + continue; + const visrtx::vec3 c(field.values[i * 4 + 0], + field.values[i * 4 + 1], + field.values[i * 4 + 2]); + // Closest box point to the node center -> largest exp(-d^2 k) over the box. + const visrtx::vec3 dNear = c - glm::clamp(c, boxLo, boxHi); + hi += w * expf(-glm::dot(dNear, dNear) * k); + } + return visrtx::box1{0.f, hi}; +}