diff --git a/devices/rtx/device/frame/Frame.cu b/devices/rtx/device/frame/Frame.cu index 8c5ac3742..42e182989 100644 --- a/devices/rtx/device/frame/Frame.cu +++ b/devices/rtx/device/frame/Frame.cu @@ -73,6 +73,102 @@ __device__ bool resolveSample(uint32_t idx, return divisor > 0; } +// One-sided (upper) trimmed mean -- the TRIM mode. A robust per-pixel estimator: +// a trimmed mean (Tukey 1962; Huber 1981) whose outlier set is chosen by a +// Grubbs / generalized-ESD test (Grubbs 1969; Rosner 1983), accumulated online +// with Welford (Welford 1962); an a-posteriori per-pixel sample-outlier rejector +// in the DeCoro et al. 2010 lineage. See the commit message for the full mapping +// and the two deliberate deviations from textbook ESD. +// +// `sum` is the running total of all `n` samples (the colorAccumulation value, +// undivided); `topK` holds the `trim` brightest samples the pixel saw (rgb in +// xyz, luminance in w, w < 0 = empty); `lum` carries the pixel's luminance +// Welford (mean in mean.x, M2 in m2.x). +// +// A sample is dropped when its luminance exceeds the threshold mean + k*stddev, +// with the spread (stddev) estimated over the BASE samples -- the bulk with the +// tracked brightest removed. This is the ESD masking fix: one spike otherwise +// inflates its own sigma enough to exempt itself, so a moderate k never fires. +// Leaving the candidates out of the scale keeps the threshold tied to the +// well-behaved bulk so a genuine spike stands out even at large k. +// +// Two refinements keep this from darkening the image at low spp -- the one real +// drawback of the plain version, where with few samples the tracked brightest +// are a large fraction, the base mean collapses below the true level, and even +// legitimate bright samples get dropped: +// * the threshold is centred on the FULL mean, not the base mean, so it cannot +// fall below the true level when the base excludes the bright fraction; +// * the number of samples actually dropped is capped at ~n/4, so at low spp at +// most the single most extreme spike is removed (it ramps to the full trim +// as samples accumulate) -- a large trim fraction can no longer gut the +// estimate. The brightest tracked samples are dropped first. +// Clean pixels have nothing above the threshold and resolve to the exact mean; +// the dropped fraction -> 0 with spp (consistent estimator). +__device__ vec3 resolveTrimmed( + const vec4 *topK, vec3 sum, const PixelLumStats &lum, int trim, float kSigma) +{ + constexpr int MAX_TRIM = 8; + if (trim > MAX_TRIM) + trim = MAX_TRIM; + + const int n = int(lum.n); + if (n <= 0) + return vec3(0.f); + if (n < 3) + return sum / float(n); + + // Full-distribution luminance moments, from the Welford accumulators. + const float meanFull = lum.mean.x; + const float sumL = meanFull * lum.n; + const float sumL2 = lum.m2.x + lum.n * meanFull * meanFull; + + // Gather the tracked brightest, sorted by luminance descending (<= 8 elems), + // and accumulate their moments to subtract from the base spread estimate. + float topW[MAX_TRIM]; + vec3 topRGB[MAX_TRIM]; + float sumTop = 0.0f, sumTop2 = 0.0f; + int v = 0; + for (int i = 0; i < trim; ++i) { + if (topK[i].w < 0.0f) + continue; + sumTop += topK[i].w; + sumTop2 += topK[i].w * topK[i].w; + float w = topK[i].w; + vec3 rgb = vec3(topK[i]); + int j = v - 1; + for (; j >= 0 && topW[j] < w; --j) { + topW[j + 1] = topW[j]; + topRGB[j + 1] = topRGB[j]; + } + topW[j + 1] = w; + topRGB[j + 1] = rgb; + ++v; + } + const int nB = n - v; + if (nB < 2) // too few base samples to estimate a spread + return sum / float(n); + + const float baseSum = sumL - sumTop; + const float meanB = baseSum / float(nB); + const float baseM2 = fmaxf(sumL2 - sumTop2 - meanB * baseSum, 0.0f); + const float sigmaB = sqrtf(baseM2 / float(nB - 1)); + const float threshold = meanFull + kSigma * sigmaB; + + // Drop at most ~n/4 samples (>=1), brightest first, ramping the trim fraction + // in with the sample count. + const int maxDrop = min(min(trim, n - 1), max(1, n / 4)); + vec3 dropSum(0.f); + int dropCount = 0; + for (int i = 0; i < v && dropCount < maxDrop; ++i) { + if (topW[i] <= threshold) + break; // sorted descending: nothing below is above the threshold either + dropSum += topRGB[i]; + ++dropCount; + } + + return (sum - dropSum) / float(n - dropCount); +} + __global__ void prepareDenoiseInputs(const vec4 *__restrict__ accumColor, const vec3 *__restrict__ accumAlbedo, const vec3 *__restrict__ accumNormal, @@ -82,7 +178,11 @@ __global__ void prepareDenoiseInputs(const vec4 *__restrict__ accumColor, uvec2 size, int frameID, int checkerboardID, - bool fireflyFilter) + FireflyFilterMode fireflyFilterMode, + const vec4 *__restrict__ trimTopK, + const PixelLumStats *__restrict__ lumStats, + int trim, + float sigma) { const uint32_t idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx >= size.x * size.y) @@ -101,8 +201,16 @@ __global__ void prepareDenoiseInputs(const vec4 *__restrict__ accumColor, const float invDivisor = 1.0f / float(divisor); vec4 c = accumColor[srcIdx] * invDivisor; - if (fireflyFilter) + if (fireflyFilterMode == FireflyFilterMode::TONEMAP) { c = detail::inverseTonemap(c); + } else if (fireflyFilterMode == FireflyFilterMode::TRIM && trimTopK) { + c = vec4(resolveTrimmed(trimTopK + size_t(srcIdx) * trim, + vec3(accumColor[srcIdx]), + lumStats[srcIdx], + trim, + sigma), + c.a); + } denoiseInput[idx] = c; if (denoiseAlbedo) @@ -125,7 +233,11 @@ void launchPrepareDenoiseInputs(const vec4 *accumColor, uvec2 size, int frameID, int checkerboardID, - bool fireflyFilter, + FireflyFilterMode fireflyFilterMode, + const vec4 *trimTopK, + const PixelLumStats *lumStats, + int trim, + float sigma, cudaStream_t stream) { const uint32_t nPixels = size.x * size.y; @@ -140,7 +252,11 @@ void launchPrepareDenoiseInputs(const vec4 *accumColor, size, frameID, checkerboardID, - fireflyFilter); + fireflyFilterMode, + trimTopK, + lumStats, + trim, + sigma); } __global__ void compositeBackground(vec4 *__restrict__ accumColor, @@ -152,7 +268,9 @@ __global__ void compositeBackground(vec4 *__restrict__ accumColor, FrameFormat format, int frameID, int checkerboardID, - bool isDenoised) + bool isDenoised, + const vec4 *__restrict__ trimTopK, + const PixelLumStats *__restrict__ lumStats) { const uint32_t idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx >= size.x * size.y) @@ -176,8 +294,18 @@ __global__ void compositeBackground(vec4 *__restrict__ accumColor, rendered.a = accumColor[sourceIdx].a / float(divisor); } else { rendered = accumColor[sourceIdx] / float(divisor); - if (renderer.fireflyFilter) + if (renderer.fireflyFilterMode == FireflyFilterMode::TONEMAP) { rendered = detail::inverseTonemap(rendered); + } else if (renderer.fireflyFilterMode == FireflyFilterMode::TRIM + && trimTopK) { + rendered = vec4( + resolveTrimmed(trimTopK + size_t(sourceIdx) * renderer.fireflyFilterTrim, + vec3(accumColor[sourceIdx]), + lumStats[sourceIdx], + renderer.fireflyFilterTrim, + renderer.fireflyFilterSigma), + rendered.a); + } } const vec2 uv = (vec2(px, py) + 0.5f) * invSize; @@ -217,6 +345,8 @@ void launchCompositeBackground(vec4 *accumColor, int frameID, int checkerboardID, bool isDenoised, + const vec4 *trimTopK, + const PixelLumStats *lumStats, cudaStream_t stream) { const uint32_t nPixels = size.x * size.y; @@ -231,7 +361,9 @@ void launchCompositeBackground(vec4 *accumColor, format, frameID, checkerboardID, - isDenoised); + isDenoised, + trimTopK, + lumStats); } } // anonymous namespace @@ -332,6 +464,7 @@ void Frame::finalize() m_instIDBuffer.resize(channelInstID ? numPixels() : 0); m_accumColor.reserve(numPixels() * sizeof(vec4)); + m_lumStats.reserve(numPixels() * sizeof(PixelLumStats)); if (channelAlbedo) m_accumAlbedo.reserve(numPixels() * sizeof(vec3)); else @@ -358,6 +491,7 @@ void Frame::finalize() } hd.fb.buffers.colorAccumulation = m_accumColor.ptrAs(); + hd.fb.buffers.lumStats = m_lumStats.ptrAs(); hd.fb.buffers.depth = channelDepth ? m_depthBuffer.dataDevice() : nullptr; hd.fb.buffers.primID = channelPrimID ? m_primIDBuffer.dataDevice() : nullptr; @@ -497,6 +631,18 @@ void Frame::renderFrame() m_camera->populateFrameData(hd.camera, hd.fb.size); hd.world = m_world->gpuData(); + // The TRIM top-k buffer is `trim` times the color buffer, so allocate it only + // while that mode is active. trim is a renderer parameter, hence resolved + // here rather than in finalize(). newFrame() clears it on accumulation reset. + if (hd.renderer.fireflyFilterMode == FireflyFilterMode::TRIM) { + m_trimTopK.reserve( + numPixels() * size_t(hd.renderer.fireflyFilterTrim) * sizeof(vec4)); + hd.fb.buffers.trimTopK = m_trimTopK.ptrAs(); + } else { + m_trimTopK.reset(); + hd.fb.buffers.trimTopK = nullptr; + } + hd.registry.samplers = state.registry.samplers.devicePtr(); hd.registry.geometries = state.registry.geometries.devicePtr(); hd.registry.materials = state.registry.materials.devicePtr(); @@ -545,7 +691,11 @@ void Frame::renderFrame() hd.fb.size, hd.fb.frameID, hd.fb.checkerboardID, - hd.renderer.fireflyFilter, + hd.renderer.fireflyFilterMode, + m_trimTopK.ptrAs(), + m_lumStats.ptrAs(), + hd.renderer.fireflyFilterTrim, + hd.renderer.fireflyFilterSigma, state.stream); m_denoiser.launch(); @@ -560,6 +710,8 @@ void Frame::renderFrame() hd.fb.frameID, hd.fb.checkerboardID, /*isDenoised=*/true, + m_trimTopK.ptrAs(), + m_lumStats.ptrAs(), state.stream); m_denoiser.convertOutput(); @@ -577,6 +729,8 @@ void Frame::renderFrame() hd.fb.frameID, hd.fb.checkerboardID, /*isDenoised=*/false, + m_trimTopK.ptrAs(), + m_lumStats.ptrAs(), state.stream); } @@ -905,6 +1059,15 @@ void Frame::newFrame() thrust::fill_n(thrust::device_pointer_cast(m_accumColor.ptrAs()), numPixels(), vec4(0.0f)); + thrust::fill_n(thrust::device_pointer_cast(m_lumStats.ptrAs()), + numPixels(), + PixelLumStats{vec3(0.0f), vec3(0.0f), 0.0f}); + if (hd.renderer.fireflyFilterMode == FireflyFilterMode::TRIM + && m_trimTopK.ptrAs()) { + thrust::fill_n(thrust::device_pointer_cast(m_trimTopK.ptrAs()), + numPixels() * size_t(hd.renderer.fireflyFilterTrim), + vec4(0.0f, 0.0f, 0.0f, -1.0f)); // w<0 marks an empty top-k slot + } // Conditionally initialize other buffers if (channelDepth) { diff --git a/devices/rtx/device/frame/Frame.h b/devices/rtx/device/frame/Frame.h index ba8236cde..1685250ee 100644 --- a/devices/rtx/device/frame/Frame.h +++ b/devices/rtx/device/frame/Frame.h @@ -121,6 +121,8 @@ struct Frame : public helium::BaseFrame, public DeviceObject DeviceBuffer m_accumColor; // vec4 DeviceBuffer m_accumAlbedo; // vec3 DeviceBuffer m_accumNormal; // vec3 + DeviceBuffer m_lumStats; // PixelLumStats: per-channel Welford + count + DeviceBuffer m_trimTopK; // TRIM mode: trim*vec4 brightest samples per pixel // Per-pixel pre-denoise estimates. Keeping these separate from pixelBuffer // avoids the denoiser reading its own previous output on non-rendered diff --git a/devices/rtx/device/gpu/gpu_objects.h b/devices/rtx/device/gpu/gpu_objects.h index c9a40242d..e81d74453 100644 --- a/devices/rtx/device/gpu/gpu_objects.h +++ b/devices/rtx/device/gpu/gpu_objects.h @@ -779,6 +779,15 @@ enum class BackgroundMode IMAGE }; +// Per-sample firefly suppression strategy applied during accumulation. +enum class FireflyFilterMode +{ + NONE, // accumulate raw radiance (unbiased) + TONEMAP, // reversible Reinhard round-trip (legacy; dims highlights) + CLAMP, // per-pixel Welford luminance clamp (energy-preserving) + TRIM // adaptive upper-trimmed mean (consistent; near-unbiased at high spp) +}; + union RendererBackgroundGPUData { glm::vec4 color; @@ -797,7 +806,10 @@ struct RendererGPUData float occlusionDistance; bool cullTriangleBF; bool premultiplyBackground; - bool fireflyFilter; // enable internal tonemapping during sample accumulation + FireflyFilterMode fireflyFilterMode; // per-sample outlier suppression strategy + float fireflyFilterSigma; // CLAMP/TRIM: k in threshold = mean + k*stddev + int fireflyFilterWarmup; // CLAMP mode: samples before the Welford cap engages + int fireflyFilterTrim; // TRIM mode: count of brightest samples tracked/trimmed glm::vec4 cutPlane; // cutting plane (nx,ny,nz,d); disabled when all zero (GPU // default) }; @@ -812,9 +824,29 @@ enum class FrameFormat UNKNOWN }; +// Per-pixel running Welford statistics for firefly suppression, tracked per RGB +// channel so a single-channel (chromatic) outlier is caught even when its +// luminance is unremarkable. `n` is the shared sample count — kept here because +// checkerboarding makes frameID a poor proxy for "how many samples this pixel +// has seen". CLAMP uses all three channels; TRIM uses only the luminance Welford +// in channel x and reads n as its sample divisor. +struct PixelLumStats +{ + glm::vec3 mean; // per-channel running mean + glm::vec3 m2; // per-channel sum of squared deltas + float n; // sample count (shared across channels and with TRIM) +}; + struct FrameBuffers { glm::vec4 *colorAccumulation; + PixelLumStats *lumStats; + // TRIM mode: the `trim` brightest samples seen per pixel, laid out + // [pixel*trim + slot] as (rgb in xyz, luminance in w; w < 0 marks an empty + // slot). At resolve the trimmed mean removes the outliers among these from + // the running colorAccumulation sum, so it needs only O(trim) memory per + // pixel. The sample count lives in lumStats->n. + glm::vec4 *trimTopK; float *depth; uint32_t *primID; uint32_t *objID; diff --git a/devices/rtx/device/gpu/gpu_tonemap.h b/devices/rtx/device/gpu/gpu_tonemap.h index 25f876158..e59077387 100644 --- a/devices/rtx/device/gpu/gpu_tonemap.h +++ b/devices/rtx/device/gpu/gpu_tonemap.h @@ -29,9 +29,11 @@ * POSSIBILITY OF SUCH DAMAGE. */ -// Tonemap helpers — safe to include from both PTX and regular CUDA sources. -// gpu_util.h includes and cannot be used from Frame.cu; -// this header provides the subset needed by the compositing kernel. +// Reversible Reinhard-on-max tonemap, used by the "tonemap" firefly filter +// mode: each sample is compressed before accumulation and the average is +// expanded back afterward. Safe to include from both PTX and regular CUDA +// sources — gpu_util.h pulls in and cannot be included from +// the Frame.cu compositing kernel, so the inverse lives here on its own. #pragma once #include "gpu_math.h" diff --git a/devices/rtx/device/gpu/gpu_util.h b/devices/rtx/device/gpu/gpu_util.h index 7c7711145..a5e91c800 100644 --- a/devices/rtx/device/gpu/gpu_util.h +++ b/devices/rtx/device/gpu/gpu_util.h @@ -34,6 +34,7 @@ #include "cameraCreateRay.h" #include "gpu/gpu_debug.h" #include "gpu_objects.h" +#include "gpu_tonemap.h" #include "shadingState.h" // optix #include @@ -45,7 +46,6 @@ #include // cuda #include -#include "gpu_tonemap.h" #ifndef __CUDACC__ #error "gpu_util.h can only be included in device code" @@ -489,6 +489,76 @@ VISRTX_DEVICE void setPixelIds(const FramebufferGPUData &fb, } } +// Per-pixel, per-channel Welford soft-clamp for firefly suppression. +// +// Two regimes keyed on the pixel's own sample count n: +// * warmup (n < warmupSamples): per-channel stats are too sparse for a +// variance-based cap, so clamp each channel to a generous multiple of its +// running mean. This catches a firefly from the 2nd sample on (the 1st has +// no prior). +// * steady (n >= warmupSamples): clamp each channel to mean + k*stddev. +// +// In both regimes the Welford stats are updated from the *clamped* value: a +// sample below its cap contributes its true value (so σ tracks the well-behaved +// bulk), but a sample above the cap contributes only the capped value. This is +// the base-excluding threshold — letting raw outliers into the stats lets one +// firefly inflate σ enough to raise its own future cap, so a moderate k never +// fires (the σ-inflation trap). Feeding the clamped value bounds that inflation, +// which is what lets k drop to a value that actually bites. The cost is that a +// genuinely legitimate >kσ excursion on a high-variance pixel is clipped and +// cannot grow the cap — unavoidable for a per-pixel online clamp, which is why +// CLAMP is the deliberately-aggressive, biased mode. +// +// Each channel is clamped independently to its own cap, so a chromatic +// (single-channel) outlier is caught even when its luminance is unremarkable, +// without a near-zero channel dragging the whole (saturated-color) pixel dark. +VISRTX_DEVICE vec4 fireflyClamp( + PixelLumStats *lumStatsBuf, uint32_t idx, vec4 color, float kSigma, int warmupSamples) +{ + constexpr float kWarmupCapFactor = 8.0f; // warmup cap = factor * running mean + + if (!lumStatsBuf) + return color; + + PixelLumStats s = lumStatsBuf[idx]; + const vec3 orig = vec3(color); + const bool warm = s.n < float(warmupSamples); + + vec3 clamped = orig; + if (s.n >= 1.0f) { + for (int k = 0; k < 3; ++k) { + const float L = orig[k]; + if (!(L > 0.0f)) + continue; + float cap; + if (warm) { + cap = kWarmupCapFactor * s.mean[k]; + } else { + // Needs >=2 samples for a sample variance; with warmupSamples==1 the + // steady branch is reachable at n==1, where m2/(n-1) is 0/0. + const float variance = + s.n > 1.0f ? fmaxf(s.m2[k] / (s.n - 1.0f), 0.0f) : 0.0f; + cap = s.mean[k] + kSigma * sqrtf(variance); + } + if (cap > 0.0f && L > cap) + clamped[k] = cap; + } + } + + // Welford update from the clamped value in both regimes: a within-cap sample + // updates with its true value, an outlier only with the bounded cap value. + const float n = s.n + 1.0f; + for (int k = 0; k < 3; ++k) { + const float delta = clamped[k] - s.mean[k]; + s.mean[k] += delta / n; + s.m2[k] += delta * (clamped[k] - s.mean[k]); + } + s.n = n; + lumStatsBuf[idx] = s; + + return vec4(clamped, color.a); +} + VISRTX_DEVICE void accumPixelSample(const FrameGPUData &frame, const uvec2 &pixel, const vec4 &color, @@ -498,9 +568,60 @@ VISRTX_DEVICE void accumPixelSample(const FrameGPUData &frame, const auto &fb = frame.fb; const uint32_t idx = detail::pixelIndex(fb, pixel); - detail::accumValue(fb.buffers.colorAccumulation, - idx, - frame.renderer.fireflyFilter ? detail::tonemap(color) : color); + vec4 c; + switch (frame.renderer.fireflyFilterMode) { + case FireflyFilterMode::TONEMAP: + c = detail::tonemap(color); + break; + case FireflyFilterMode::CLAMP: + c = fireflyClamp(fb.buffers.lumStats, + idx, + color, + frame.renderer.fireflyFilterSigma, + frame.renderer.fireflyFilterWarmup); + break; + case FireflyFilterMode::TRIM: + // Accumulate the raw sample (colorAccumulation keeps the running sum) while + // tracking the `trim` brightest samples this pixel has seen and a luminance + // Welford. The trimmed mean at resolve removes only the tracked samples + // that a base-excluding threshold flags as outliers, so clean pixels drop + // nothing (exact mean) and the dropped fraction -> 0 with spp. + if (fb.buffers.trimTopK) { + const int trim = frame.renderer.fireflyFilterTrim; + const float L = luminance(vec3(color)); + // A non-finite sample poisons the Welford mean/variance and drives the + // resolve threshold to inf, so nothing ever trims and the pixel stays + // non-finite forever. Drop it outright -- the very firefly TRIM exists to + // suppress must not survive into colorAccumulation. + if (glm::isnan(L) || glm::isinf(L)) + return; + vec4 *slots = fb.buffers.trimTopK + size_t(idx) * trim; + int minSlot = 0; + float minL = slots[0].w; + for (int i = 1; i < trim; ++i) { + if (slots[i].w < minL) { + minL = slots[i].w; + minSlot = i; + } + } + if (L > minL) + slots[minSlot] = vec4(vec3(color), L); + + PixelLumStats &s = fb.buffers.lumStats[idx]; + const float n = s.n + 1.f; + const float delta = L - s.mean.x; + s.mean.x += delta / n; + s.m2.x += delta * (L - s.mean.x); + s.n = n; + } + c = color; + break; + default: + c = color; + break; + } + + detail::accumValue(fb.buffers.colorAccumulation, idx, c); detail::accumValue(fb.buffers.albedo, idx, albedo); detail::accumValue(fb.buffers.normal, idx, normal); } diff --git a/devices/rtx/device/renderer/Renderer.cpp b/devices/rtx/device/renderer/Renderer.cpp index 571a34532..23b160d44 100644 --- a/devices/rtx/device/renderer/Renderer.cpp +++ b/devices/rtx/device/renderer/Renderer.cpp @@ -76,6 +76,20 @@ using MaterialRecord = SbtRecord; // Helper functions /////////////////////////////////////////////////////////// +// Map the fireflyFilterMode string to the enum. Unknown strings (and "none") +// resolve to NONE. The default-when-unset is decided by the caller and passed +// in as the string, so this stays a pure lookup with no special cases. +static FireflyFilterMode parseFireflyFilterMode(const std::string &mode) +{ + if (mode == "tonemap") + return FireflyFilterMode::TONEMAP; + if (mode == "clamp") + return FireflyFilterMode::CLAMP; + if (mode == "trim") + return FireflyFilterMode::TRIM; + return FireflyFilterMode::NONE; +} + static std::string longestBeginningMatch( const std::string_view &first, const std::string_view &second) { @@ -164,8 +178,21 @@ void Renderer::commitParameters() (denoiseMode == "colorAlbedo" || denoiseMode == "colorAlbedoNormal"); m_denoiseNormal = (denoiseMode == "colorAlbedoNormal"); - m_fireflyFilter = - getParam("fireflyFilter", getParam("tonemap", true)); + // Default to tonemap, matching the pre-enum behaviour. + m_fireflyFilterMode = + parseFireflyFilterMode(getParamString("fireflyFilterMode", "tonemap")); + // Default k=8: the cap is mean + k*stddev with σ estimated from clamped + // samples (outliers excluded), so a moderate k bites without one firefly + // raising its own threshold. Lower clamps harder (more bias); raise for more + // energy fidelity at the cost of leaking brighter fireflies. + m_fireflyFilterSigma = + std::max(0.f, getParam("fireflyFilterSigma", 8.f)); + m_fireflyFilterWarmup = std::max(1, getParam("fireflyFilterWarmup", 4)); + // TRIM tracks this many of the brightest samples per pixel and trims the ones + // a base-excluding threshold flags as outliers. Small: per-pixel memory is + // trim*vec4 and a handful covers the firefly count of any one pixel. + m_fireflyFilterTrim = + std::clamp(getParam("fireflyFilterTrim", 4), 1, 8); m_sampleLimit = getParam("sampleLimit", 128); m_cullTriangleBF = getParam("cullTriangleBackfaces", false); m_volumeSamplingRate = @@ -208,7 +235,10 @@ void Renderer::populateFrameData(FrameGPUData &fd) const fd.renderer.ambientIntensity = m_ambientIntensity; fd.renderer.occlusionDistance = m_occlusionDistance; fd.renderer.cullTriangleBF = m_cullTriangleBF; - fd.renderer.fireflyFilter = m_fireflyFilter; + fd.renderer.fireflyFilterMode = m_fireflyFilterMode; + fd.renderer.fireflyFilterSigma = m_fireflyFilterSigma; + fd.renderer.fireflyFilterWarmup = m_fireflyFilterWarmup; + fd.renderer.fireflyFilterTrim = m_fireflyFilterTrim; fd.renderer.inverseVolumeSamplingRate = 1.f / m_volumeSamplingRate; fd.renderer.numIterations = std::max(m_spp, 1); fd.renderer.premultiplyBackground = m_premultiplyBackground; @@ -1121,9 +1151,9 @@ void Renderer::cleanup() } } -bool Renderer::filterFireflies() const +FireflyFilterMode Renderer::fireflyFilterMode() const { - return m_fireflyFilter; + return m_fireflyFilterMode; } } // namespace visrtx diff --git a/devices/rtx/device/renderer/Renderer.h b/devices/rtx/device/renderer/Renderer.h index 07ea51883..8455ad52e 100644 --- a/devices/rtx/device/renderer/Renderer.h +++ b/devices/rtx/device/renderer/Renderer.h @@ -74,7 +74,7 @@ struct Renderer : public Object bool denoise() const; bool denoiseUsingAlbedo() const; bool denoiseUsingNormal() const; - bool filterFireflies() const; + FireflyFilterMode fireflyFilterMode() const; int sampleLimit() const; static Renderer *createInstance( @@ -90,8 +90,10 @@ struct Renderer : public Object bool m_denoise{false}; bool m_denoiseAlbedo{false}; bool m_denoiseNormal{false}; - bool m_fireflyFilter{ - true}; // enable internal tonemapping during sample accumulation + FireflyFilterMode m_fireflyFilterMode{FireflyFilterMode::TONEMAP}; + float m_fireflyFilterSigma{8.f}; // CLAMP mode: k in cap = mean + k*stddev + int m_fireflyFilterWarmup{4}; // CLAMP mode: samples before the Welford cap + int m_fireflyFilterTrim{4}; // TRIM mode: brightest samples tracked/tested (ESD bound) int m_sampleLimit{0}; bool m_cullTriangleBF{false}; bool m_premultiplyBackground{false}; diff --git a/devices/rtx/device/visrtx_renderer_fast.json b/devices/rtx/device/visrtx_renderer_fast.json index d93c65951..f85ae9557 100644 --- a/devices/rtx/device/visrtx_renderer_fast.json +++ b/devices/rtx/device/visrtx_renderer_fast.json @@ -59,13 +59,50 @@ "description": "mode controlling buffers given to the denoiser" }, { - "name": "fireflyFilter", + "name": "fireflyFilterMode", "types": [ - "ANARI_BOOL" + "ANARI_STRING" ], "tags": [], - "default": true, - "description": "suppress fireflies via reversible tonemapping before accumulation" + "default": "tonemap", + "values": [ + "none", + "tonemap", + "clamp", + "trim" + ], + "description": "removes bright speckle (firefly) noise: none (off), tonemap (aggressive; strongest suppression but can dim the image), clamp (moderate; caps only the brightest outlier samples), trim (gentlest; energy-preserving, discards only detected outliers)" + }, + { + "name": "fireflyFilterSigma", + "types": [ + "ANARI_FLOAT32" + ], + "tags": [], + "default": 8.0, + "minimum": 0.0, + "description": "clamp/trim mode: outlier sensitivity; lower removes more aggressively" + }, + { + "name": "fireflyFilterWarmup", + "types": [ + "ANARI_INT32" + ], + "tags": [], + "default": 4, + "minimum": 1, + "description": "clamp mode: samples observed before filtering starts; higher delays filtering but estimates the cap more reliably" + }, + { + "name": "fireflyFilterTrim", + "types": [ + "ANARI_INT32" + ], + "tags": [], + "default": 4, + "minimum": 1, + "maximum": 8, + "description": "trim mode: how many outlier samples to track per pixel; higher is more robust but uses more memory" }, { "name": "premultiplyBackground", @@ -163,4 +200,4 @@ ] } ] -} \ No newline at end of file +} diff --git a/devices/rtx/device/visrtx_renderer_interactive.json b/devices/rtx/device/visrtx_renderer_interactive.json index 8da771214..7e45829fb 100644 --- a/devices/rtx/device/visrtx_renderer_interactive.json +++ b/devices/rtx/device/visrtx_renderer_interactive.json @@ -50,13 +50,50 @@ "description": "mode controlling buffers given to the denoiser" }, { - "name": "fireflyFilter", + "name": "fireflyFilterMode", "types": [ - "ANARI_BOOL" + "ANARI_STRING" ], "tags": [], - "default": true, - "description": "suppress fireflies via reversible tonemapping before accumulation" + "default": "tonemap", + "values": [ + "none", + "tonemap", + "clamp", + "trim" + ], + "description": "removes bright speckle (firefly) noise: none (off), tonemap (aggressive; strongest suppression but can dim the image), clamp (moderate; caps only the brightest outlier samples), trim (gentlest; energy-preserving, discards only detected outliers)" + }, + { + "name": "fireflyFilterSigma", + "types": [ + "ANARI_FLOAT32" + ], + "tags": [], + "default": 8.0, + "minimum": 0.0, + "description": "clamp/trim mode: outlier sensitivity; lower removes more aggressively" + }, + { + "name": "fireflyFilterWarmup", + "types": [ + "ANARI_INT32" + ], + "tags": [], + "default": 4, + "minimum": 1, + "description": "clamp mode: samples observed before filtering starts; higher delays filtering but estimates the cap more reliably" + }, + { + "name": "fireflyFilterTrim", + "types": [ + "ANARI_INT32" + ], + "tags": [], + "default": 4, + "minimum": 1, + "maximum": 8, + "description": "trim mode: how many outlier samples to track per pixel; higher is more robust but uses more memory" }, { "name": "premultiplyBackground", @@ -165,4 +202,4 @@ ] } ] -} \ No newline at end of file +} diff --git a/devices/rtx/device/visrtx_renderer_quality.json b/devices/rtx/device/visrtx_renderer_quality.json index df5afee1f..4923ced69 100644 --- a/devices/rtx/device/visrtx_renderer_quality.json +++ b/devices/rtx/device/visrtx_renderer_quality.json @@ -50,13 +50,50 @@ "description": "mode controlling buffers given to the denoiser" }, { - "name": "fireflyFilter", + "name": "fireflyFilterMode", "types": [ - "ANARI_BOOL" + "ANARI_STRING" + ], + "tags": [], + "default": "tonemap", + "values": [ + "none", + "tonemap", + "clamp", + "trim" + ], + "description": "removes bright speckle (firefly) noise: none (off), tonemap (aggressive; strongest suppression but can dim the image), clamp (moderate; caps only the brightest outlier samples), trim (gentlest; energy-preserving, discards only detected outliers)" + }, + { + "name": "fireflyFilterSigma", + "types": [ + "ANARI_FLOAT32" ], "tags": [], - "default": true, - "description": "suppress fireflies via reversible tonemapping before accumulation" + "default": 8.0, + "minimum": 0.0, + "description": "clamp/trim mode: outlier sensitivity; lower removes more aggressively" + }, + { + "name": "fireflyFilterWarmup", + "types": [ + "ANARI_INT32" + ], + "tags": [], + "default": 4, + "minimum": 1, + "description": "clamp mode: samples observed before filtering starts; higher delays filtering but estimates the cap more reliably" + }, + { + "name": "fireflyFilterTrim", + "types": [ + "ANARI_INT32" + ], + "tags": [], + "default": 4, + "minimum": 1, + "maximum": 8, + "description": "trim mode: how many outlier samples to track per pixel; higher is more robust but uses more memory" }, { "name": "premultiplyBackground", @@ -132,4 +169,4 @@ ] } ] -} \ No newline at end of file +} diff --git a/tsd/src/tsd/scene/Object.cpp b/tsd/src/tsd/scene/Object.cpp index b08054aae..a21f80250 100644 --- a/tsd/src/tsd/scene/Object.cpp +++ b/tsd/src/tsd/scene/Object.cpp @@ -1,8 +1,8 @@ // Copyright 2024-2026 NVIDIA Corporation // SPDX-License-Identifier: Apache-2.0 -#include "tsd/scene/Object.hpp" #include "tsd/scene/AnariHandleCache.hpp" +#include "tsd/scene/Object.hpp" #include "tsd/scene/Scene.hpp" #ifndef TSD_USE_CUDA @@ -811,7 +811,15 @@ void parseANARIObjectInfo( svs.push_back(*stringValues); if (!svs.empty()) { p.setStringValues(svs); - p.setValue(svs[0].c_str()); // reset default value + // Fall back to the first listed value unless the device declares a + // default that is actually one of them: feeding an out-of-list string to + // setValue leaves the value and its selection index desynced. + const char *declaredDefault = parameter->type == ANARI_STRING && defaultValue + ? static_cast(defaultValue) + : nullptr; + const bool defaultIsValid = declaredDefault + && std::find(svs.begin(), svs.end(), declaredDefault) != svs.end(); + p.setValue(defaultIsValid ? declaredDefault : svs[0].c_str()); } } }