From cb3ac76e6d952021fa70dda3f799f5b79c449aee Mon Sep 17 00:00:00 2001 From: kirides <13602143+kirides@users.noreply.github.com> Date: Wed, 4 Mar 2026 19:14:14 +0100 Subject: [PATCH 01/42] Texture Atlases --- D3D11Engine/BaseGraphicsEngine.h | 2 + D3D11Engine/ConstantBufferStructs.h | 76 +- D3D11Engine/D3D11Engine.vcxproj | 6 + D3D11Engine/D3D11Engine.vcxproj.filters | 8 + D3D11Engine/D3D11GraphicsEngine.cpp | 1144 ++++++++++++++--- D3D11Engine/D3D11GraphicsEngine.h | 62 +- D3D11Engine/D3D11ShaderManager.cpp | 30 +- D3D11Engine/D3D11ShadowMap.cpp | 80 -- D3D11Engine/D3D11ShadowMap.h | 2 - D3D11Engine/D3D11StructuredBuffer.h | 134 ++ D3D11Engine/D3D11TextureAtlasManager.h | 274 ++++ D3D11Engine/Frustum.h | 190 ++- D3D11Engine/GothicAPI.cpp | 279 +++- D3D11Engine/GothicAPI.h | 67 +- D3D11Engine/GothicGraphicsState.h | 2 + D3D11Engine/ImGuiShim.cpp | 1 + D3D11Engine/ShaderIDs.h | 4 + D3D11Engine/Shaders/CS_CullVobs.hlsl | 130 ++ D3D11Engine/Shaders/PS_DiffuseAtlas.hlsl | 110 ++ .../Shaders/VS_ExInstancedObjIndirect.hlsl | 217 ++++ .../VS_ExInstancedObjIndirectAtlas.hlsl | 211 +++ D3D11Engine/StaticVOBCache.cpp | 151 +++ D3D11Engine/StaticVOBCache.h | 52 + D3D11Engine/packages.config | 1 + 24 files changed, 2859 insertions(+), 374 deletions(-) create mode 100644 D3D11Engine/D3D11StructuredBuffer.h create mode 100644 D3D11Engine/D3D11TextureAtlasManager.h create mode 100644 D3D11Engine/Shaders/CS_CullVobs.hlsl create mode 100644 D3D11Engine/Shaders/PS_DiffuseAtlas.hlsl create mode 100644 D3D11Engine/Shaders/VS_ExInstancedObjIndirect.hlsl create mode 100644 D3D11Engine/Shaders/VS_ExInstancedObjIndirectAtlas.hlsl create mode 100644 D3D11Engine/StaticVOBCache.cpp create mode 100644 D3D11Engine/StaticVOBCache.h diff --git a/D3D11Engine/BaseGraphicsEngine.h b/D3D11Engine/BaseGraphicsEngine.h index 174c4a39..560e683c 100644 --- a/D3D11Engine/BaseGraphicsEngine.h +++ b/D3D11Engine/BaseGraphicsEngine.h @@ -230,4 +230,6 @@ class BaseGraphicsEngine { virtual XRESULT UpdateRenderStates() { return XR_SUCCESS; }; virtual std::unique_ptr RecordGraphicsEvent( LPCWSTR region ) { return std::make_unique(); } + + virtual void OnWorldLoaded() {}; }; diff --git a/D3D11Engine/ConstantBufferStructs.h b/D3D11Engine/ConstantBufferStructs.h index 4bd715d3..e2de17d4 100644 --- a/D3D11Engine/ConstantBufferStructs.h +++ b/D3D11Engine/ConstantBufferStructs.h @@ -16,17 +16,71 @@ struct VobInstanceInfo { DWORD GP_Slot; }; -/** Remap-index for the static vobs */ -struct VobInstanceRemapInfo { - bool operator < ( const VobInstanceRemapInfo& b ) const { - return InstanceRemapIndex < b.InstanceRemapIndex; - } - - bool operator == ( const VobInstanceRemapInfo& o ) const { - return InstanceRemapIndex == o.InstanceRemapIndex; - } - - DWORD InstanceRemapIndex; +struct VobInstanceInfoAtlas { + XMFLOAT4X4 world; + XMFLOAT4X4 prevWorld; // Previous frame's world matrix for motion vectors + DWORD color; + float windStrenth; + float canBeAffectedByPlayer; + // Texture Atlas information, directly stored in the instance data for easy access in shader without needing an extra StructuredBuffer + int slice; + float uStart; + float vStart; + float uEnd; + float vEnd; +}; + +// Descriptor returned for use with shader +// Points to a specific slice in the Texture2DArray atlas, along with UV coordinates for sampling that slice +// this is pointed to from VobInstanceInfo GP_Slot into a StructuredBuffer, which is then indexed in the shader to get the correct slice/UVs for each instance +struct TextureDescriptor { + int slice; + float uStart; + float vStart; + float uEnd; + float vEnd; +}; + +// CPU-side lookup: maps a zCTexture* to its atlas placement +struct TextureAtlasLookup { + DXGI_FORMAT atlasFormat; + TextureDescriptor descriptor; +}; + +// Per-vob data uploaded once at world load, read by GPU cull compute shader +struct VobGPUData { + XMFLOAT3 aabbCenter; + float pad0; + XMFLOAT3 aabbExtent; + float pad1; + XMFLOAT4X4 world; + XMFLOAT4X4 prevWorld; + DWORD color; + float aniModeStrength; + float canBeAffectedByPlayer; + UINT submeshStart; // index into SubmeshGPUData[] + UINT submeshCount; // how many submeshes this vob maps to + UINT pad2[3]; +}; + +// Per-submesh lookup, shared across all vobs with the same visual +struct SubmeshGPUData { + int slice; + float uStart, vStart, uEnd, vEnd; + UINT argIndex; // index into merged indirect args + UINT instanceBaseOffset; // fixed write offset in instance buffer + UINT pad; +}; + +// Constant buffer for the GPU cull compute shader +struct CullConstants { + XMFLOAT4 frustumPlanes[6]; + XMFLOAT3 cameraPosition; + float drawDistance; + float globalWindStrength; + UINT windAdvanced; + UINT numVobs; + UINT pad; }; #pragma pack (push, 1) diff --git a/D3D11Engine/D3D11Engine.vcxproj b/D3D11Engine/D3D11Engine.vcxproj index 02df1227..b50205f1 100644 --- a/D3D11Engine/D3D11Engine.vcxproj +++ b/D3D11Engine/D3D11Engine.vcxproj @@ -848,7 +848,9 @@ copy "$(OutDir)$(TargetName).pdb" "$(G1_SYSTEM_PATH)\ddraw.pdb" + + @@ -993,6 +995,7 @@ copy "$(OutDir)$(TargetName).pdb" "$(G1_SYSTEM_PATH)\ddraw.pdb" + @@ -1209,6 +1212,7 @@ copy "$(OutDir)$(TargetName).pdb" "$(G1_SYSTEM_PATH)\ddraw.pdb" + @@ -1298,6 +1302,7 @@ copy "$(OutDir)$(TargetName).pdb" "$(G1_SYSTEM_PATH)\ddraw.pdb" + @@ -1307,6 +1312,7 @@ copy "$(OutDir)$(TargetName).pdb" "$(G1_SYSTEM_PATH)\ddraw.pdb" + diff --git a/D3D11Engine/D3D11Engine.vcxproj.filters b/D3D11Engine/D3D11Engine.vcxproj.filters index f300b42c..6ba362b6 100644 --- a/D3D11Engine/D3D11Engine.vcxproj.filters +++ b/D3D11Engine/D3D11Engine.vcxproj.filters @@ -839,6 +839,11 @@ ZenGin\Classes + + Engine + + + @@ -1138,6 +1143,9 @@ ZenGin\Classes + + Engine + diff --git a/D3D11Engine/D3D11GraphicsEngine.cpp b/D3D11Engine/D3D11GraphicsEngine.cpp index 78e4e94c..5d20b684 100644 --- a/D3D11Engine/D3D11GraphicsEngine.cpp +++ b/D3D11Engine/D3D11GraphicsEngine.cpp @@ -54,6 +54,7 @@ #include "zCOption.h" #include "RenderGraph.h" #include "RGBuilder.h" +#include "D3D11TextureAtlasManager.h" #ifdef BUILD_SPACER #define IS_SPACER_BUILD true @@ -97,6 +98,7 @@ static std::unique_ptr igdextDevice; static std::unique_ptr agsDevice; extern bool userHaveAMDGPU; +bool SupportTextureAtlases = false; namespace { @@ -593,6 +595,18 @@ XRESULT D3D11GraphicsEngine::Init() { Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.FeatureSet.UseLayeredRendering = FeatureRTArrayIndexFromAnyShader; } + if (maxFeatureLevel >= D3D_FEATURE_LEVEL::D3D_FEATURE_LEVEL_11_0) { + // check amount of GPU Memory available + constexpr uint64_t GiB = 1024ull * 1024ull * 1024ull; + if ( adpDesc.DedicatedVideoMemory >= 4 * GiB ) { + // currently we just assume everything fits into memory. + // in the future we should make use of Tiled Resources, which would allow us + // to support more memory intensive features, even on less than 4GB cards, by streaming in the necessary tiles. + SupportTextureAtlases = true; + Engine::GAPI->GetRendererState().RendererSettings.UseIndirectVobShadows = SupportTextureAtlases; + } + } + LogInfo() << "Creating ShaderManager"; ShaderManager = std::make_unique(); ShaderManager->Init(); @@ -5402,241 +5416,343 @@ void XM_CALLCONV D3D11GraphicsEngine::DrawWorldAroundForWorldShadow( FXMVECTOR p } if ( Engine::GAPI->GetRendererState().RendererSettings.DrawVOBs ) { - static std::vector potentialCasters; - std::vector& vobs = potentialCasters; - if (params.CascadeIndex != -1) { - auto renderQueue = ShadowMaps->GetRenderQueue( params.CascadeIndex ); - renderQueue->ProcessQueue(); - - vobs = renderQueue->GetVobs(); - } else { - static std::vector _1; - static std::vector _2; - potentialCasters.reserve(1024); - potentialCasters.clear(); - - LegacyRenderQueueProxy q(potentialCasters, _1, _2); - RndCullContext ctx; - ctx.queue = &q; - ctx.cameraPosition = Engine::GAPI->GetCameraPosition(); - ctx.stage = RenderStage::STAGE_DRAW_WORLD; - ctx.frustum = currentFrustum; - ctx.drawDistances.OutdoorVobs = Engine::GAPI->GetRendererState().RendererSettings.OutdoorVobDrawRadius; - ctx.drawDistances.OutdoorVobsSmall = Engine::GAPI->GetRendererState().RendererSettings.OutdoorSmallVobDrawRadius; - ctx.drawDistances.IndoorVobs = Engine::GAPI->GetRendererState().RendererSettings.IndoorVobDrawRadius; - ctx.drawDistances.VisualFX = Engine::GAPI->GetRendererState().RendererSettings.VisualFXDrawRadius; - Engine::GAPI->CollectVisibleVobs( ctx ); - } - - // clear any residue of main render pass - for ( auto const& staticMeshVisual : Engine::GAPI->GetStaticMeshVisuals() ) { - staticMeshVisual.second->StartNewFrame(); - } - for ( auto& it : vobs) { - // process any vobs only visible in this cascade - VobInstanceInfo vii = {}; - vii.world = it->WorldMatrix; - vii.prevWorld = it->HasValidPrevMatrix ? it->PrevWorldMatrix : it->WorldMatrix; - vii.color = it->GroundColor; - vii.windStrenth = 0.0f; - vii.canBeAffectedByPlayer = 0; - - zTAnimationMode aniMode = it->Vob->GetVisualAniMode(); - if ( aniMode != zVISUAL_ANIMODE_NONE ) { - vii.canBeAffectedByPlayer = (!it->Vob->GetDynColl() ? 1.0f : 0.0f); - GothicAPI::ProcessVobAnimation( it->Vob, aniMode, vii ); - } + bool drawStaticVobs = true; + if ( Engine::GAPI->GetRendererState().RendererSettings.UseIndirectVobShadows && !m_AtlasDrawGroups.empty() ) { + // GPU indirect path: reuse DrawVOBsIndirect with the cascade/shadow frustum. + // BC1 groups render depth-only (no PS); BC2 groups use the alpha-test PS. + DrawVOBsIndirect( currentFrustum, /*bindPS=*/false ); + drawStaticVobs = false; + } + + static std::vector dynamicVobCasters; + static std::vector _1; + static std::vector _2; + dynamicVobCasters.reserve( 1024 ); + dynamicVobCasters.clear(); + + LegacyRenderQueueProxy q( dynamicVobCasters, _1, _2 ); + RndCullContext ctx; + ctx.queue = &q; + ctx.cameraPosition = Engine::GAPI->GetCameraPosition(); + ctx.stage = RenderStage::STAGE_DRAW_SHADOWS; + ctx.frustum = currentFrustum; + ctx.drawDistances.OutdoorVobs = Engine::GAPI->GetRendererState().RendererSettings.OutdoorVobDrawRadius; + ctx.drawDistances.OutdoorVobsSmall = Engine::GAPI->GetRendererState().RendererSettings.OutdoorSmallVobDrawRadius; + ctx.drawDistances.IndoorVobs = 0; + ctx.drawDistances.VisualFX = 0; + Engine::GAPI->CollectVisibleVobs( ctx, (EBspTreeCollectFlags)(EBspTreeCollectFlags::COLLECT_DYNAMIC_VOBS) ); + + struct BatchableStaticVobs { + MeshVisualInfo* VisualInfo; + std::vector Instances; + uint32_t StartInstanceNum; + }; - reinterpret_cast(it->VisualInfo)->Instances.push_back( vii ); - } + if (drawStaticVobs) { + // clear any residue of main render pass + const auto& vobs = m_StaticVobs; + std::vector outRenderQueue{}; + StaticVOBCache::CullAndGatherStaticVOBs( m_StaticVobsAABBs, vobs, currentFrustum.GetPlanes()._Elems, outRenderQueue ); + + std::sort( outRenderQueue.begin(), outRenderQueue.end(), + []( const StaticVobRenderItem& a, const StaticVobRenderItem& b ) { + return a.mvi->Visual < a.mvi->Visual; + } ); + + // Group vobs by visual and prepare instance data + std::vector batchables; + batchables.reserve( outRenderQueue.size() ); + + zCVisual* lastVisual = nullptr; + for ( auto& itm : outRenderQueue ) { + auto v = vobs[itm.instanceIndex]; + + if ( v->VisualInfo->Visual != lastVisual ) { + // New visual, reset instance data + lastVisual = v->VisualInfo->Visual; + batchables.push_back( { reinterpret_cast(v->VisualInfo), std::vector() } ); + batchables.back().Instances.reserve( 10 ); + } + BatchableStaticVobs& batch = batchables.back(); + + MeshVisualInfo* visualInfo = batch.VisualInfo; + VobInstanceInfo vii = {}; + vii.world = v->WorldMatrix; + vii.prevWorld = v->HasValidPrevMatrix ? v->PrevWorldMatrix : v->WorldMatrix; + vii.color = v->GroundColor; + vii.windStrenth = 0.0f; + vii.canBeAffectedByPlayer = 0; + zTAnimationMode aniMode = v->Vob->GetVisualAniMode(); + if ( aniMode != zVISUAL_ANIMODE_NONE ) { + vii.canBeAffectedByPlayer = (!v->Vob->GetDynColl() ? 1.0f : 0.0f); + GothicAPI::ProcessVobAnimation( v->Vob, aniMode, vii ); + } + batch.Instances.push_back( vii ); + } - auto _ = START_TIMING( timer_labels_vobs[timerLabelIndex] ); - auto _1 = RecordGraphicsEvent( L"Shadows::DrawVOBs" ); + auto _ = START_TIMING( timer_labels_vobs[timerLabelIndex] ); + auto _1 = RecordGraphicsEvent( L"DrawVOBs" ); - size_t ByteWidth = DynamicInstancingBuffer->GetSizeInBytes(); + size_t ByteWidth = DynamicInstancingBuffer->GetSizeInBytes(); - if ( ByteWidth < sizeof( VobInstanceInfo ) * vobs.size() ) { - if ( Engine::GAPI->GetRendererState().RendererSettings.EnableDebugLog ) - LogInfo() << "Instancing buffer too small (" << ByteWidth - << "), need " << sizeof( VobInstanceInfo ) * vobs.size() - << " bytes. Recreating buffer."; + if ( ByteWidth < sizeof( VobInstanceInfo ) * vobs.size() ) { + if ( Engine::GAPI->GetRendererState().RendererSettings.EnableDebugLog ) + LogInfo() << "Instancing buffer too small (" << ByteWidth + << "), need " << sizeof( VobInstanceInfo ) * vobs.size() + << " bytes. Recreating buffer."; - // Buffer too small, recreate it - DynamicInstancingBuffer->Init( - nullptr, sizeof( VobInstanceInfo ) * vobs.size(), - D3D11VertexBuffer::B_VERTEXBUFFER, D3D11VertexBuffer::U_DYNAMIC, - D3D11VertexBuffer::CA_WRITE ); + // Buffer too small, recreate it + DynamicInstancingBuffer->Init( + nullptr, sizeof( VobInstanceInfo ) * vobs.size(), + D3D11VertexBuffer::B_VERTEXBUFFER, D3D11VertexBuffer::U_DYNAMIC, + D3D11VertexBuffer::CA_WRITE ); - SetDebugName( DynamicInstancingBuffer->GetShaderResourceView().Get(), "DynamicInstancingBuffer->ShaderResourceView" ); - SetDebugName( DynamicInstancingBuffer->GetVertexBuffer().Get(), "DynamicInstancingBuffer->VertexBuffer" ); - } - - std::vector activeVisuals; - activeVisuals.reserve(256); // Reserve enough memory to avoid allocations - for ( auto const& pair : Engine::GAPI->GetStaticMeshVisuals() ) { - if ( !pair.second->Instances.empty() ) { - activeVisuals.push_back(pair.second); + SetDebugName( DynamicInstancingBuffer->GetShaderResourceView().Get(), "DynamicInstancingBuffer->ShaderResourceView" ); + SetDebugName( DynamicInstancingBuffer->GetVertexBuffer().Get(), "DynamicInstancingBuffer->VertexBuffer" ); } - } - - byte* data; - UINT size; - if ( SUCCEEDED( DynamicInstancingBuffer->Map( D3D11VertexBuffer::M_WRITE_DISCARD, - reinterpret_cast(&data), &size ) ) ) { + + byte* data; + UINT size; UINT loc = 0; - for ( auto const& staticMeshVisual : activeVisuals ) { - staticMeshVisual->StartInstanceNum = loc; - memcpy( data + loc * sizeof( VobInstanceInfo ), staticMeshVisual->Instances.data(), - sizeof( VobInstanceInfo ) * staticMeshVisual->Instances.size() ); - loc += staticMeshVisual->Instances.size(); + if ( !SUCCEEDED( DynamicInstancingBuffer->Map( D3D11VertexBuffer::M_WRITE_DISCARD, + reinterpret_cast(&data), &size ) ) ) { + LogError() << "Failed to map dynamic instancing buffer for writing!"; + return; + } + for ( auto& staticMeshVisual : batchables ) { + staticMeshVisual.StartInstanceNum = loc; + memcpy( data + loc * sizeof( VobInstanceInfo ), staticMeshVisual.Instances.data(), + sizeof( VobInstanceInfo ) * staticMeshVisual.Instances.size() ); + loc += staticMeshVisual.Instances.size(); } DynamicInstancingBuffer->Unmap(); - } else { - LogError() << "Failed to map dynamic instancing buffer for vobs."; - } - // Apply instancing shader - SetActiveVertexShader( VShaderID::VS_ExInstancedObj ); - // SetActivePixelShader("PS_DiffuseAlphaTest"); - ActiveVS->Apply(); + // Apply instancing shader + SetActiveVertexShader( VShaderID::VS_ExInstancedObj ); + // SetActivePixelShader("PS_DiffuseAlphaTest"); + ActiveVS->Apply(); - if ( !linearDepth ) // Only unbind when not rendering linear depth - { - // Unbind PS - Context->PSSetShader( nullptr, nullptr, 0 ); - } + if ( !linearDepth ) // Only unbind when not rendering linear depth + { + // Unbind PS + Context->PSSetShader( nullptr, nullptr, 0 ); + } - GraphicsShaderConstantBuffer windBuffer = {}; - if ( ActiveVS ) { - windBuffer = ActiveVS->GetBuffer( "WindParams" ); - windBuffer.Bind(); - } + if ( ActiveVS ) { + ActiveVS->GetConstantBuffer()[1]->BindToVertexShader( 1 ); + } - XMFLOAT3 vPlayerPosition = Engine::GAPI->GetPlayerVob() ? Engine::GAPI->GetPlayerVob()->GetPositionWorld() : XMFLOAT3( 0, 0, 0 ); - g_windBuffer.playerPos = float3( vPlayerPosition.x, vPlayerPosition.y, vPlayerPosition.z ); + XMFLOAT3 vPlayerPosition = Engine::GAPI->GetPlayerVob() ? Engine::GAPI->GetPlayerVob()->GetPositionWorld() : XMFLOAT3( 0, 0, 0 ); + g_windBuffer.playerPos = float3( vPlayerPosition.x, vPlayerPosition.y, vPlayerPosition.z ); - UINT dynOffset[] = { 0 }; - UINT dynuStride[] = { sizeof( VobInstanceInfo ) }; + // Draw all vobs the player currently sees + for ( auto const& b : batchables ) { + if ( b.Instances.empty() ) continue; + auto staticMeshVisual = b.VisualInfo; - ID3D11Buffer* buffers[1] = { - DynamicInstancingBuffer->GetVertexBuffer().Get() - }; + g_windBuffer.minHeight = staticMeshVisual->BBox.Min.y; + g_windBuffer.maxHeight = staticMeshVisual->BBox.Max.y; - GetContext()->IASetVertexBuffers( 1, 1, buffers, dynuStride, dynOffset ); + if ( ActiveVS ) { + ActiveVS->GetConstantBuffer()[1]->UpdateBuffer( &g_windBuffer ); + } - // Sort visuals by whether they need alpha testing to minimize shader switches - if ( alphaRef > 0.0f ) { - std::sort( activeVisuals.begin(), activeVisuals.end(), [alphaRef, colorWritesEnabled]( const MeshVisualInfo* a, const MeshVisualInfo* b ) { - return a->NeedsAlphaTesting < b->NeedsAlphaTesting || (a->NeedsAlphaTesting == b->NeedsAlphaTesting && a->Visual < b->Visual); - } ); - } else { - std::sort( activeVisuals.begin(), activeVisuals.end(), [alphaRef, colorWritesEnabled]( const MeshVisualInfo* a, const MeshVisualInfo* b ) { - return a->Visual < b->Visual; - } ); - } + zCTexture* previousTx = nullptr; + for ( auto const& itt : staticMeshVisual->MeshesByTexture ) { + std::vector& mlist = staticMeshVisual->MeshesByTexture[itt.first]; + if ( mlist.empty() ) continue; + + zCTexture* tx = itt.first.Texture; + bool bindTexture = previousTx != tx + && tx + && (tx->HasAlphaChannel() || colorWritesEnabled); + + // Check for alphablend + bool blendAdd = + itt.first.Material->GetAlphaFunc() == zMAT_ALPHA_FUNC_ADD; + bool blendBlend = + itt.first.Material->GetAlphaFunc() == zMAT_ALPHA_FUNC_BLEND; + // if one part of the mesh uses blending, all do, which means that + // the mesh likely is transparent and can't cast shadows + if ( blendAdd || blendBlend ) { + continue; + } + } - // Draw all vobs the player currently sees - D3D11PShader* currPs = nullptr; + for ( unsigned int i = 0; i < mlist.size(); i++ ) { - for ( auto const& staticMeshVisual : activeVisuals ) { - if ( staticMeshVisual->Instances.empty() ) continue; - - g_windBuffer.minHeight = staticMeshVisual->BBox.Min.y; - g_windBuffer.maxHeight = staticMeshVisual->BBox.Max.y; + for ( unsigned int i = 0; i < mlist.size(); i++ ) { + // Bind texture + if ( bindTexture ) { + if ( alphaRef > 0.0f && tx->CacheIn( 0.6f ) == zRES_CACHED_IN ) { + tx->Bind( 0 ); + ActivePS->Apply(); + previousTx = tx; + } else + continue; + } else { + if ( !linearDepth ) // Only unbind when not rendering linear depth + { + // Unbind PS + Context->PSSetShader( nullptr, nullptr, 0 ); + } + } - windBuffer.Update( &g_windBuffer ); + MeshInfo* mi = mlist[i]; - bool doReset = true; - zCTexture* previousTx = nullptr; - for ( auto const& itt : staticMeshVisual->MeshesByTexture ) { - std::vector& mlist = staticMeshVisual->MeshesByTexture[itt.first]; - if ( mlist.empty() ) continue; - - // Check for alphablend - bool blendAdd = - itt.first.Material->GetAlphaFunc() == zMAT_ALPHA_FUNC_ADD; - bool blendBlend = - itt.first.Material->GetAlphaFunc() == zMAT_ALPHA_FUNC_BLEND; - // if one part of the mesh uses blending, all do, which means that - // the mesh likely is transparent and can't cast shadows - if ( !doReset || blendAdd || blendBlend ) { - doReset = false; - continue; + // Draw batch + DrawInstanced( mi->MeshVertexBuffer, mi->MeshIndexBuffer, + mi->Indices.size(), DynamicInstancingBuffer.get(), + sizeof( VobInstanceInfo ), b.Instances.size(), + sizeof( ExVertexStruct ), b.StartInstanceNum ); + + Engine::GAPI->GetRendererState().RendererInfo.FrameDrawnVobs += + b.Instances.size(); + } } + } + } // end else (CPU indirect path) - zCTexture* tx = itt.first.Texture; - bool bindTexture = previousTx != tx - && tx - && (tx->HasAlphaChannel() || colorWritesEnabled) - && alphaRef > 0.0f; + // Draw dynamic vobs (spawned at runtime, not part of m_StaticVobs or atlas) + if ( !dynamicVobCasters.empty() ) { + // Group by visual for instanced drawing - // Bind texture - if ( bindTexture ) { - if ( tx->CacheIn( 0.6f ) == zRES_CACHED_IN ) { - auto t = tx->GetSurface()->GetEngineTexture()->GetShaderResourceView().Get(); - Context->PSSetShaderResources( 0, 1, &t ); - auto nextPs = ActivePS.get(); - if ( currPs != nextPs ) { - currPs = nextPs; - ActivePS->Apply(); - } - previousTx = tx; - } else - continue; - } else { - if ( !linearDepth ) // Only unbind when not rendering linear depth - { - // Unbind PS - if ( currPs != nullptr ) { - Context->PSSetShader( nullptr, nullptr, 0 ); - currPs = nullptr; - } + std::vector dynBatches; + std::unordered_map batchIndex; + batchIndex.reserve( dynamicVobCasters.size() ); // usually single, but can be multiple + + for ( auto* v : dynamicVobCasters ) { + if ( !v->VisualInfo ) continue; + MeshVisualInfo* vi = reinterpret_cast( v->VisualInfo ); + + auto [it, inserted] = batchIndex.emplace( vi, dynBatches.size() ); + if ( inserted ) { + dynBatches.push_back( { vi, {}, 0 } ); + } + + VobInstanceInfo vii = {}; + vii.world = v->WorldMatrix; + vii.prevWorld = v->HasValidPrevMatrix ? v->PrevWorldMatrix : v->WorldMatrix; + vii.color = v->GroundColor; + vii.windStrenth = 0.0f; + vii.canBeAffectedByPlayer = 0; + zTAnimationMode aniMode = v->Vob->GetVisualAniMode(); + if ( aniMode != zVISUAL_ANIMODE_NONE ) { + vii.canBeAffectedByPlayer = (!v->Vob->GetDynColl() ? 1.0f : 0.0f); + GothicAPI::ProcessVobAnimation( v->Vob, aniMode, vii ); + } + dynBatches[it->second].Instances.push_back( vii ); + } + + if ( !dynBatches.empty() ) { + // Ensure instancing buffer is large enough + size_t needed = dynamicVobCasters.size() * sizeof( VobInstanceInfo ); + if ( DynamicInstancingBuffer->GetSizeInBytes() < needed ) { + DynamicInstancingBuffer->Init( + nullptr, needed, + D3D11VertexBuffer::B_VERTEXBUFFER, D3D11VertexBuffer::U_DYNAMIC, + D3D11VertexBuffer::CA_WRITE ); + } + + byte* dynData; + UINT dynSize; + UINT dynLoc = 0; + if ( SUCCEEDED( DynamicInstancingBuffer->Map( D3D11VertexBuffer::M_WRITE_DISCARD, + reinterpret_cast(&dynData), &dynSize ) ) ) { + for ( auto& batch : dynBatches ) { + batch.StartInstanceNum = dynLoc; + memcpy( dynData + dynLoc * sizeof( VobInstanceInfo ), batch.Instances.data(), + sizeof( VobInstanceInfo ) * batch.Instances.size() ); + dynLoc += batch.Instances.size(); } + DynamicInstancingBuffer->Unmap(); } - for ( unsigned int i = 0; i < mlist.size(); i++ ) { + // Set up instanced vertex shader (GPU indirect path may have changed shader state) + SetActiveVertexShader( VShaderID::VS_ExInstancedObj ); + ActiveVS->Apply(); - MeshInfo* mi = mlist[i]; + if ( linearDepth ) { + SetActivePixelShader( PShaderID::PS_LinDepth ); + } else { + SetActivePixelShader( PShaderID::PS_DiffuseAlphaTest ); + Context->PSSetShader( nullptr, nullptr, 0 ); + } - // Draw batch + // Rebind PS constant buffers (GPU indirect path may have overwritten them) + ActivePS->GetConstantBuffer()[0]->UpdateBuffer( + &Engine::GAPI->GetRendererState().GraphicsState ); + ActivePS->GetConstantBuffer()[0]->BindToPixelShader( 0 ); - /* Dont re-bind buffer all the time*/ - const auto vb = mi->MeshVertexBuffer; - const auto ib = mi->MeshIndexBuffer; + GSky* dynSky = Engine::GAPI->GetSky(); + ActivePS->GetConstantBuffer()[1]->UpdateBuffer( &dynSky->GetAtmosphereCB() ); + ActivePS->GetConstantBuffer()[1]->BindToPixelShader( 1 ); - UINT offset[] = { 0 }; - UINT uStride[] = { sizeof( ExVertexStruct ) }; - ID3D11Buffer* buffers[1] = { - vb->GetVertexBuffer().Get() - }; - - auto numIndices = mi->Indices.size(); - const auto numInstances = staticMeshVisual->Instances.size(); - const auto startInstanceNum = staticMeshVisual->StartInstanceNum; - const auto indexOffset = 0; + InfiniteRangeConstantBuffer->BindToPixelShader( 3 ); - GetContext()->IASetVertexBuffers( 0, 1, buffers, uStride, offset ); + SetupVS_ExConstantBuffer(); - Context->IASetIndexBuffer( ib->GetVertexBuffer().Get(), VERTEX_INDEX_DXGI_FORMAT, 0 ); + if ( ActiveVS ) { + ActiveVS->GetConstantBuffer()[1]->BindToVertexShader( 1 ); + } + + XMFLOAT3 vPlayerPosition = Engine::GAPI->GetPlayerVob() ? Engine::GAPI->GetPlayerVob()->GetPositionWorld() : XMFLOAT3( 0, 0, 0 ); + g_windBuffer.playerPos = float3( vPlayerPosition.x, vPlayerPosition.y, vPlayerPosition.z ); + + for ( auto const& batch : dynBatches ) { + if ( batch.Instances.empty() ) continue; + + g_windBuffer.minHeight = batch.VisualInfo->BBox.Min.y; + g_windBuffer.maxHeight = batch.VisualInfo->BBox.Max.y; - unsigned int max = - Engine::GAPI->GetRendererState().RendererSettings.MaxNumFaces * 3; - numIndices = max != 0 ? (numIndices < max ? numIndices : max) : numIndices; + if ( ActiveVS ) { + ActiveVS->GetConstantBuffer()[1]->UpdateBuffer( &g_windBuffer ); + } - // Draw the batch - GetContext()->DrawIndexedInstanced( numIndices, numInstances, indexOffset, 0, - startInstanceNum ); + zCTexture* previousTx = nullptr; + for ( auto const& itt : batch.VisualInfo->MeshesByTexture ) { + const std::vector& mlist = itt.second; + if ( mlist.empty() ) continue; - Engine::GAPI->GetRendererState().RendererInfo.FrameDrawnTriangles += - (numIndices / 3) * numInstances; + zCTexture* tx = itt.first.Texture; + bool bindTexture = previousTx != tx + && tx + && (tx->HasAlphaChannel() || colorWritesEnabled); - Engine::GAPI->GetRendererState().RendererInfo.FrameDrawnVobs++; + bool blendAdd = itt.first.Material->GetAlphaFunc() == zMAT_ALPHA_FUNC_ADD; + bool blendBlend = itt.first.Material->GetAlphaFunc() == zMAT_ALPHA_FUNC_BLEND; + if ( blendAdd || blendBlend ) { + continue; // shadow pass, transparent materials shouldn't cast shadows + } + + for ( unsigned int i = 0; i < mlist.size(); i++ ) { + if ( bindTexture ) { + if ( alphaRef > 0.0f && tx->CacheIn( 0.6f ) == zRES_CACHED_IN ) { + tx->Bind( 0 ); + ActivePS->Apply(); + previousTx = tx; + } else + continue; + } else { + if ( !linearDepth ) { + Context->PSSetShader( nullptr, nullptr, 0 ); + } + } + + MeshInfo* mi = mlist[i]; + + DrawInstanced( mi->MeshVertexBuffer, mi->MeshIndexBuffer, + mi->Indices.size(), DynamicInstancingBuffer.get(), + sizeof( VobInstanceInfo ), batch.Instances.size(), + sizeof( ExVertexStruct ), batch.StartInstanceNum ); + + Engine::GAPI->GetRendererState().RendererInfo.FrameDrawnVobs += + batch.Instances.size(); + } + } } } - - // Reset visual - if ( doReset ) staticMeshVisual->StartNewFrame(); } } @@ -5775,7 +5891,20 @@ XRESULT D3D11GraphicsEngine::DrawVOBsInstanced() { { auto _ = START_TIMING( "VOBs" ); - SetDefaultStates(); + + bool needsDrawVobs = true; + if ( !m_AtlasDrawGroups.empty() ) { + Frustum cameraFrustum = Frustum::AlwaysContainingFrustum(); + if ( auto cam = zCCamera::GetCamera() ) { + cam->Activate(); + cameraFrustum.BuildPerspective( + XMMatrixTranspose( XMLoadFloat4x4( &cam->trafoView ) ), + XMLoadFloat4x4( &cam->trafoProjection ) ); + } + DrawVOBsIndirect( cameraFrustum ); + needsDrawVobs = false; + } + SetActivePixelShader( PShaderID::PS_Diffuse ); SetActiveVertexShader( VShaderID::VS_ExInstancedObj ); @@ -5824,7 +5953,12 @@ XRESULT D3D11GraphicsEngine::DrawVOBsInstanced() { if ( !renderSettings.FixViewFrustum || (renderSettings.FixViewFrustum && vobs.empty()) ) { - Engine::GAPI->CollectVisibleVobs( vobs, m_FrameLights, mobs ); + + EBspTreeCollectFlags collectFlags = EBspTreeCollectFlags::COLLECT_ALL_MUTATE; + if (!needsDrawVobs) { + collectFlags = (EBspTreeCollectFlags)(collectFlags & ~EBspTreeCollectFlags::COLLECT_VOBS); + } + Engine::GAPI->CollectVisibleVobs( vobs, m_FrameLights, mobs, CullAll, collectFlags ); } } @@ -5832,15 +5966,15 @@ XRESULT D3D11GraphicsEngine::DrawVOBsInstanced() { UpdateMorphMeshVisual(); } - if ( renderSettings.DrawVOBs ) { + if ( renderSettings.DrawVOBs && vobs.size() > 0 ) { auto _1 = Engine::GraphicsEngine->RecordGraphicsEvent( L"DrawVOBsInstanced->DrawVOBs" ); - + std::vector activeVisuals; activeVisuals.reserve( 256 ); // Reserve enough memory to avoid allocations for ( auto const& pair : Engine::GAPI->GetStaticMeshVisuals() ) { if ( !pair.second->Instances.empty() ) { activeVisuals.push_back( pair.second ); - } + } } // Create instancebuffer for this frame @@ -6098,6 +6232,162 @@ XRESULT D3D11GraphicsEngine::DrawVOBsInstanced() { return XR_SUCCESS; } +XRESULT D3D11GraphicsEngine::DrawVOBsIndirect( const Frustum& frustum, bool bindPS ) { + if ( m_AtlasDrawGroups.empty() || !m_VobGPUBuffer || !m_StaticGlobalVertexBuffer || !m_StaticGlobalIndexBuffer ) + return XR_SUCCESS; + + auto _ = RecordGraphicsEvent( L"DrawVOBsIndirect" ); + + auto& context = GetContext(); + + // --- 1. Reset indirect args InstanceCounts via CopyResource from template --- + context->CopyResource( m_MergedIndirectArgs->GetIndirectBuffer().Get(), + m_IndirectArgsTemplate.Get() ); + + // --- 2. Update cull constant buffer --- + extern float vobAnimation_WindStrength; + CullConstants cb = {}; + memcpy( cb.frustumPlanes, frustum.GetPlanes().data(), 6 * sizeof( XMFLOAT4 ) ); + cb.cameraPosition = Engine::GAPI->GetCameraPosition(); + cb.drawDistance = Engine::GAPI->GetRendererState().RendererSettings.OutdoorVobDrawRadius; + cb.globalWindStrength = vobAnimation_WindStrength; + cb.windAdvanced = (Engine::GAPI->GetRendererState().RendererSettings.WindQuality + == GothicRendererSettings::EWindQuality::WIND_QUALITY_ADVANCED) ? 1 : 0; + cb.numVobs = static_cast(m_StaticVobs.size()); + m_CullConstantBuffer->UpdateBuffer( &cb ); + m_CullConstantBuffer->BindToComputeShader( 0 ); + + // --- 3. Dispatch GPU cull compute shader --- + auto cullCS = ShaderManager->GetCShader( CShaderID::CS_CullVobs ); + if ( !cullCS ) + return XR_SUCCESS; + cullCS->Apply(); + + // SRV t0 = VobGPUData, t1 = SubmeshGPUData + ID3D11ShaderResourceView* srvs[2] = { + m_VobGPUBuffer->GetSRV(), + m_SubmeshGPUBuffer->GetSRV() + }; + context->CSSetShaderResources( 0, 2, srvs ); + + // UAV u0 = InstanceOutput (structured), u1 = IndirectArgs (raw byte address) + ID3D11UnorderedAccessView* uavs[2] = { + m_InstanceBufferGPU->GetUAV(), + m_MergedIndirectArgs->GetUnorderedAccessView().Get() + }; + context->CSSetUnorderedAccessViews( 0, 2, uavs, nullptr ); + + UINT numGroups = (static_cast(m_StaticVobs.size()) + 63) / 64; + context->Dispatch( numGroups, 1, 1 ); + + // Unbind CS resources + ID3D11ShaderResourceView* nullSRV[2] = { nullptr, nullptr }; + ID3D11UnorderedAccessView* nullUAV[2] = { nullptr, nullptr }; + context->CSSetShaderResources( 0, 2, nullSRV ); + context->CSSetUnorderedAccessViews( 0, 2, nullUAV, nullptr ); + context->CSSetShader( nullptr, nullptr, 0 ); + + // --- 4. Bind global geometry (once) --- + UINT strides[2] = { sizeof( ExVertexStruct ), sizeof( uint32_t ) }; + UINT offsets[2] = { 0, 0 }; + ID3D11Buffer* vbs[2] = { + m_StaticGlobalVertexBuffer->GetVertexBuffer().Get(), + m_GlobalInstanceIdBuffer->GetVertexBuffer().Get() + }; + context->IASetVertexBuffers( 0, 2, vbs, strides, offsets ); + context->IASetIndexBuffer( m_StaticGlobalIndexBuffer->GetVertexBuffer().Get(), VERTEX_INDEX_DXGI_FORMAT, 0 ); + context->IASetPrimitiveTopology( D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST ); + + // --- 5. Bind instance StructuredBuffer (GPU-written) to VS t1 --- + ID3D11ShaderResourceView* instSRV = m_InstanceBufferGPU->GetSRV(); + context->VSSetShaderResources( 1, 1, &instSRV ); + + // --- 6. Set shaders --- + SetActiveVertexShader( VShaderID::VS_ExInstancedObjIndirectAtlas ); + + SetupVS_ExMeshDrawCall(); + SetupVS_ExConstantBuffer(); + + // Wind constant buffer (VS still needs this for the wind animation code) + VS_ExConstantBuffer_Wind windBuff{}; + ApplyWindProps( windBuff ); + ActiveVS->GetConstantBuffer()[1]->UpdateBuffer( &windBuff ); + ActiveVS->GetConstantBuffer()[1]->BindToVertexShader( 1 ); + + // Bind reflection cube (only needed for opaque/full-shading pass) + if ( bindPS ) + context->PSSetShaderResources( 4, 1, ReflectionCube.GetAddressOf() ); + + ActiveVS->Apply(); + + // Shared PS constant buffer data (same for both shader variants) + MaterialInfo defMaterial{}; + GSky* sky = Engine::GAPI->GetSky(); + + // --- 7. Draw per atlas group using merged indirect args --- + for ( auto& group : m_AtlasDrawGroups ) { + // Bind this atlas's Texture2DArray SRV to PS slot t0 + ID3D11ShaderResourceView* srv = m_TextureAtlasses[group.format].atlasSRV; + if ( !srv ) + continue; + + // In shadow pass (bindPS=false): BC2/BC3 have alpha and need the alpha-test shader. + // BC1 is fully opaque — depth-only, no PS needed. + const bool needsPS = bindPS || (group.format == DXGI_FORMAT_BC2_UNORM); + + if ( needsPS ) { + context->PSSetShaderResources( 0, 1, &srv ); + + // Full shading: select by format. Shadow alpha-test: always alpha-test PS. + if ( bindPS && group.format != DXGI_FORMAT_BC2_UNORM ) + SetActivePixelShader( PShaderID::PS_DiffuseAtlas ); + else + SetActivePixelShader( PShaderID::PS_DiffuseAtlasAlphaTest ); + + ActivePS->GetConstantBuffer()[0]->UpdateBuffer( + &Engine::GAPI->GetRendererState().GraphicsState ); + ActivePS->GetConstantBuffer()[0]->BindToPixelShader( 0 ); + + ActivePS->GetConstantBuffer()[1]->UpdateBuffer( &sky->GetAtmosphereCB() ); + ActivePS->GetConstantBuffer()[1]->BindToPixelShader( 1 ); + + ActivePS->GetConstantBuffer()[2]->UpdateBuffer( &defMaterial.buffer ); + ActivePS->GetConstantBuffer()[2]->BindToPixelShader( 2 ); + + OutdoorVobsConstantBuffer->BindToPixelShader( 3 ); + + ActivePS->Apply(); + } else { + // Depth-only opaque: unbind pixel shader + context->PSSetShader( nullptr, nullptr, 0 ); + } + + if ( DrawMultiIndexedInstancedIndirect ) { + // Vendor multi-draw-indirect: all submeshes in this group in one API call + DrawMultiIndexedInstancedIndirect( + context.Get(), + group.mergedArgsCount, + m_MergedIndirectArgs->GetIndirectBuffer().Get(), + group.mergedArgsOffset, + sizeof( D3D11_DRAW_INDEXED_INSTANCED_INDIRECT_ARGS ) ); + } else { + // Fallback: one DrawIndexedInstancedIndirect per submesh + // InstanceCount is GPU-written, so zero-instance draws are no-ops on GPU + for ( UINT i = 0; i < group.mergedArgsCount; i++ ) { + context->DrawIndexedInstancedIndirect( + m_MergedIndirectArgs->GetIndirectBuffer().Get(), + group.mergedArgsOffset + i * sizeof( D3D11_DRAW_INDEXED_INSTANCED_INDIRECT_ARGS ) ); + } + } + } + + // Unbind instance buffer + ID3D11ShaderResourceView* nullVSSRV = nullptr; + context->VSSetShaderResources( 1, 1, &nullVSSRV ); + + return XR_SUCCESS; +} + /** Draws the static VOBs */ XRESULT D3D11GraphicsEngine::DrawFrameAlphaMeshes() { @@ -7784,6 +8074,462 @@ void D3D11GraphicsEngine::StorePrevViewProjMatrix() { } } +void D3D11GraphicsEngine::BuildStaticGeometryBuffers() { + std::vector allVertices; + std::vector allIndices; + + // Temporary: group submeshes by atlas format + std::map groupsByFormat; + + // Track which MeshInfo* we've already added (same visual used by many vobs shares geometry) + std::unordered_set processedMeshes; + + for ( auto const& [proto, visual] : Engine::GAPI->GetStaticMeshVisuals() ) { + for ( auto const& [meshKey, meshList] : visual->MeshesByTexture ) { + // Look up atlas descriptor for this texture + auto it = m_TextureAtlasLookup.find( meshKey.Texture ); + if ( it == m_TextureAtlasLookup.end() ) + continue; // texture not in any atlas + + const TextureAtlasLookup& lookup = it->second; + auto& group = groupsByFormat[lookup.atlasFormat]; + group.format = lookup.atlasFormat; + + for ( MeshInfo* mi : meshList ) { + if ( !processedMeshes.insert( mi ).second ) + continue; // already in global buffer + + UINT baseVertex = static_cast(allVertices.size()); + UINT startIndex = static_cast(allIndices.size()); + + allVertices.insert( allVertices.end(), mi->Vertices.begin(), mi->Vertices.end() ); + allIndices.insert( allIndices.end(), mi->Indices.begin(), mi->Indices.end() ); + + StaticSubmeshEntry entry; + entry.indexCount = static_cast(mi->Indices.size()); + entry.startIndexLocation = startIndex; + entry.baseVertexLocation = static_cast(baseVertex); + entry.atlasDesc = lookup.descriptor; + entry.visual = visual; + + group.submeshes.push_back( entry ); + + // Pre-build indirect args (InstanceCount filled per-frame) + D3D11_DRAW_INDEXED_INSTANCED_INDIRECT_ARGS args = {}; + args.IndexCountPerInstance = entry.indexCount; + args.InstanceCount = 0; + args.StartIndexLocation = entry.startIndexLocation; + args.BaseVertexLocation = entry.baseVertexLocation; + args.StartInstanceLocation = 0; + group.indirectArgs.push_back( args ); + } + } + } + + if ( allVertices.empty() ) { + LogWarn() << "BuildStaticGeometryBuffers: No vertices to process"; + return; + } + + // Create global vertex buffer (IMMUTABLE) + m_StaticGlobalVertexBuffer = std::make_unique(); + m_StaticGlobalVertexBuffer->Init( + allVertices.data(), + static_cast(allVertices.size() * sizeof( ExVertexStruct )), + D3D11VertexBuffer::B_VERTEXBUFFER, + D3D11VertexBuffer::U_IMMUTABLE, + D3D11VertexBuffer::CA_NONE ); + + // Create global index buffer (IMMUTABLE) + m_StaticGlobalIndexBuffer = std::make_unique(); + m_StaticGlobalIndexBuffer->Init( + allIndices.data(), + static_cast(allIndices.size() * sizeof( VERTEX_INDEX )), + D3D11VertexBuffer::B_INDEXBUFFER, + D3D11VertexBuffer::U_IMMUTABLE, + D3D11VertexBuffer::CA_NONE ); + + // Create instance ID buffer: {0, 1, 2, ..., N} + // A conservative upper bound for max instances (vobs * avg submeshes) + UINT maxInstanceIds = static_cast(m_StaticVobs.size() * 4); + if ( maxInstanceIds < 4096 ) maxInstanceIds = 4096; + std::vector instanceIds( maxInstanceIds ); + for ( uint32_t i = 0; i < maxInstanceIds; i++ ) + instanceIds[i] = i; + + m_GlobalInstanceIdBuffer = std::make_unique(); + m_GlobalInstanceIdBuffer->Init( + instanceIds.data(), + static_cast(instanceIds.size() * sizeof( uint32_t )), + D3D11VertexBuffer::B_VERTEXBUFFER, + D3D11VertexBuffer::U_IMMUTABLE, + D3D11VertexBuffer::CA_NONE ); + + // Move groups into final vector and create indirect buffers + m_AtlasDrawGroups.clear(); + for ( auto& [fmt, group] : groupsByFormat ) { + if ( group.indirectArgs.empty() ) + continue; + + UINT bufSize = static_cast(group.indirectArgs.size() * sizeof( D3D11_DRAW_INDEXED_INSTANCED_INDIRECT_ARGS )); + group.indirectBuffer = std::make_unique(); + group.indirectBuffer->Init( + group.indirectArgs.data(), bufSize, + D3D11IndirectBuffer::B_VERTEXBUFFER, + D3D11IndirectBuffer::U_DYNAMIC, + D3D11IndirectBuffer::CA_WRITE ); + + m_AtlasDrawGroups.push_back( std::move( group ) ); + } + + LogInfo() << "Atlas geometry: " << allVertices.size() << " vertices, " + << allIndices.size() << " indices, " + << m_AtlasDrawGroups.size() << " atlas groups, " + << processedMeshes.size() << " unique submeshes"; +} + +void D3D11GraphicsEngine::BuildGPUCullingBuffers() { + if ( m_AtlasDrawGroups.empty() || m_StaticVobs.empty() ) + return; + + // --- 1. Build visual -> vob count mapping --- + std::unordered_map vobsPerVisual; + std::unordered_map> vobIndicesByVisual; + + for ( size_t i = 0; i < m_StaticVobs.size(); i++ ) { + auto* visual = reinterpret_cast(m_StaticVobs[i]->VisualInfo); + vobsPerVisual[visual]++; + vobIndicesByVisual[visual].push_back( i ); + } + + // --- 2. Build merged indirect args + SubmeshGPUData --- + // Pass 1: Build merged indirect args (flat, in per-group order) and collect + // per-visual submesh entries. We must ensure SubmeshGPUData entries for the + // same visual are contiguous in the final array, but they may come from + // different atlas groups (e.g., BC1 + BC2 textures on the same mesh). + std::vector mergedArgs; + std::unordered_map> visualSubmeshMap; + + UINT runningInstanceOffset = 0; + UINT globalArgIndex = 0; + + for ( auto& group : m_AtlasDrawGroups ) { + group.mergedArgsOffset = static_cast(mergedArgs.size() * sizeof( D3D11_DRAW_INDEXED_INSTANCED_INDIRECT_ARGS )); + group.mergedArgsCount = static_cast(group.indirectArgs.size()); + + for ( size_t si = 0; si < group.submeshes.size(); si++ ) { + const auto& submesh = group.submeshes[si]; + MeshVisualInfo* visual = submesh.visual; + UINT maxInstances = vobsPerVisual.count( visual ) ? vobsPerVisual[visual] : 0; + + // Build the indirect arg with static fields; InstanceCount will be set by CS + D3D11_DRAW_INDEXED_INSTANCED_INDIRECT_ARGS args = {}; + args.IndexCountPerInstance = submesh.indexCount; + args.InstanceCount = 0; + args.StartIndexLocation = submesh.startIndexLocation; + args.BaseVertexLocation = submesh.baseVertexLocation; + args.StartInstanceLocation = runningInstanceOffset; + mergedArgs.push_back( args ); + + // Collect per-visual submesh GPU data (written contiguously in pass 2) + SubmeshGPUData smGPU = {}; + smGPU.slice = submesh.atlasDesc.slice; + smGPU.uStart = submesh.atlasDesc.uStart; + smGPU.vStart = submesh.atlasDesc.vStart; + smGPU.uEnd = submesh.atlasDesc.uEnd; + smGPU.vEnd = submesh.atlasDesc.vEnd; + smGPU.argIndex = globalArgIndex; + smGPU.instanceBaseOffset = runningInstanceOffset; + visualSubmeshMap[visual].push_back( smGPU ); + + runningInstanceOffset += maxInstances; + globalArgIndex++; + } + } + + m_TotalMaxInstances = runningInstanceOffset; + + // Pass 2: Flatten per-visual submesh entries into a contiguous SubmeshGPUData array. + // This guarantees VobGPUData.submeshStart/submeshCount indexes a contiguous range. + struct VisualSubmeshRange { + UINT start; + UINT count; + }; + std::unordered_map visualSubmeshRanges; + std::vector submeshGPU; + + for ( auto& [visual, entries] : visualSubmeshMap ) { + UINT start = static_cast(submeshGPU.size()); + for ( auto& entry : entries ) + submeshGPU.push_back( entry ); + visualSubmeshRanges[visual] = { start, static_cast(entries.size()) }; + } + + // --- 3. Build VobGPUData --- + std::vector vobGPU; + vobGPU.reserve( m_StaticVobs.size() ); + + for ( size_t i = 0; i < m_StaticVobs.size(); i++ ) { + VobInfo* v = m_StaticVobs[i]; + auto* visual = reinterpret_cast(v->VisualInfo); + + VobGPUData data = {}; + + // AABB from vob's bounding box + DirectX::BoundingBox bb = Frustum::BBoxFromzTBBox3D( v->Vob->GetBBox() ); + data.aabbCenter = bb.Center; + data.aabbExtent = bb.Extents; + + data.world = v->WorldMatrix; + data.prevWorld = v->WorldMatrix; // for static vobs, prev == current + data.color = v->GroundColor; + + // Bake animation properties + zTAnimationMode aniMode = v->Vob->GetVisualAniMode(); + if ( aniMode != zVISUAL_ANIMODE_NONE ) { + data.aniModeStrength = v->Vob->GetVisualAniModeStrength(); + data.canBeAffectedByPlayer = (!v->Vob->GetDynColl() ? 1.0f : 0.0f); + } else { + data.aniModeStrength = 0.0f; + data.canBeAffectedByPlayer = 0.0f; + } + + // Look up submesh range for this visual + auto it = visualSubmeshRanges.find( visual ); + if ( it != visualSubmeshRanges.end() ) { + data.submeshStart = it->second.start; + data.submeshCount = it->second.count; + } + + vobGPU.push_back( data ); + } + + // --- 4. Upload to GPU --- + auto* device = GetDevice().Get(); + auto* context = GetContext().Get(); + + // VobGPUData buffer (SRV only, DEFAULT usage) + m_VobGPUBuffer = std::make_unique>(); + m_VobGPUBuffer->Init( device, static_cast(vobGPU.size()), false, false ); + m_VobGPUBuffer->UpdateBufferDefault( context, vobGPU.data(), static_cast(vobGPU.size()) ); + + // SubmeshGPUData buffer (SRV only, DEFAULT usage) + m_SubmeshGPUBuffer = std::make_unique>(); + m_SubmeshGPUBuffer->Init( device, static_cast(submeshGPU.size()), false, false ); + m_SubmeshGPUBuffer->UpdateBufferDefault( context, submeshGPU.data(), static_cast(submeshGPU.size()) ); + + // Instance buffer (UAV for CS writes, SRV for VS reads) + UINT instanceCapacity = std::max( m_TotalMaxInstances, 1u ); + m_InstanceBufferGPU = std::make_unique>(); + m_InstanceBufferGPU->Init( device, instanceCapacity, false, true ); + + // Merged indirect args buffer (UAV for CS atomics + indirect draw) + UINT argsSize = static_cast(mergedArgs.size() * sizeof( D3D11_DRAW_INDEXED_INSTANCED_INDIRECT_ARGS )); + m_MergedIndirectArgs = std::make_unique(); + m_MergedIndirectArgs->Init( + mergedArgs.data(), argsSize, + D3D11IndirectBuffer::B_UNORDERED_ACCESS, + D3D11IndirectBuffer::U_DEFAULT, + D3D11IndirectBuffer::CA_NONE ); + + // Template buffer for per-frame reset (stores args with InstanceCount=0) + m_MergedArgsReset = mergedArgs; // already has InstanceCount=0 + + D3D11_BUFFER_DESC templateDesc = {}; + templateDesc.ByteWidth = argsSize; + templateDesc.Usage = D3D11_USAGE_DEFAULT; + templateDesc.BindFlags = 0; + templateDesc.MiscFlags = D3D11_RESOURCE_MISC_DRAWINDIRECT_ARGS; + + D3D11_SUBRESOURCE_DATA templateData = {}; + templateData.pSysMem = mergedArgs.data(); + device->CreateBuffer( &templateDesc, &templateData, m_IndirectArgsTemplate.ReleaseAndGetAddressOf() ); + + // Cull constant buffer + CullConstants initCB = {}; + m_CullConstantBuffer = std::make_unique( sizeof( CullConstants ), &initCB ); + + // Update the instance ID buffer to match the new total capacity + if ( m_TotalMaxInstances > 0 ) { + std::vector instanceIds( m_TotalMaxInstances ); + for ( uint32_t i = 0; i < m_TotalMaxInstances; i++ ) + instanceIds[i] = i; + + m_GlobalInstanceIdBuffer = std::make_unique(); + m_GlobalInstanceIdBuffer->Init( + instanceIds.data(), + static_cast(instanceIds.size() * sizeof( uint32_t )), + D3D11VertexBuffer::B_VERTEXBUFFER, + D3D11VertexBuffer::U_IMMUTABLE, + D3D11VertexBuffer::CA_NONE ); + } + + LogInfo() << "GPU culling: " << vobGPU.size() << " vobs, " + << submeshGPU.size() << " submesh entries, " + << mergedArgs.size() << " indirect args, " + << m_TotalMaxInstances << " max instances"; +} + + +void D3D11GraphicsEngine::BuildSceneTextureAtlasses() { + for ( size_t i = 0; i < TEXTURE_ATLAS_MAX; i++ ) { + m_TextureAtlasses[(DXGI_FORMAT)i].Destroy(); + } + m_TextureAtlasLookup.clear(); + m_AtlasDrawGroups.clear(); + + if ( !SupportTextureAtlases ) { + return; + } + + struct TextureInfo { + zCTexture* gothicTexture; + DXGI_FORMAT Format; + Microsoft::WRL::ComPtr Texture2D; + }; + std::unordered_set seenTextures; + std::vector uniqueTextures; + + for ( auto vobInfo : m_StaticVobs ) { + for ( auto& byTex : reinterpret_cast(vobInfo->VisualInfo)->MeshesByTexture ) { + zCTexture* tex = byTex.first.Texture; + if ( !tex || !seenTextures.insert( tex ).second ) + continue; // skip nulls and duplicates + + auto cachedState = tex->CacheIn( -1 ); + if ( cachedState != zRES_CACHED_IN ) + continue; + + auto surface = tex->GetSurface(); + if ( !surface || !surface->IsSurfaceReady() ) + continue; + + auto engineTex = surface->GetEngineTexture(); + if ( !engineTex ) + continue; + + D3D11_TEXTURE2D_DESC desc; + engineTex->GetTextureObject()->GetDesc( &desc ); + if ( desc.Format < 1 || desc.Format >= TEXTURE_ATLAS_MAX ) { + LogError() << "Texture " << tex->GetName() << " has unsupported format for atlas: " << desc.Format; + continue; + } + uniqueTextures.push_back( { tex, desc.Format, engineTex->GetTextureObject() } ); + } + } + + // Sort by format so textures with the same format are contiguous + std::sort( uniqueTextures.begin(), uniqueTextures.end(), []( const TextureInfo& a, const TextureInfo& b ) { + return a.Format < b.Format; + } ); + + // Create atlases per format group (process ALL groups including last) + size_t rangeStart = 0; + while ( rangeStart < uniqueTextures.size() ) { + DXGI_FORMAT fmt = uniqueTextures[rangeStart].Format; + size_t rangeEnd = rangeStart; + while ( rangeEnd < uniqueTextures.size() && uniqueTextures[rangeEnd].Format == fmt ) + rangeEnd++; + + std::vector texPtrs; + texPtrs.reserve( rangeEnd - rangeStart ); + for ( size_t i = rangeStart; i < rangeEnd; i++ ) + texPtrs.push_back( uniqueTextures[i].Texture2D.Get() ); + + std::basic_string_view txView( texPtrs.data(), texPtrs.size() ); + auto atlas = TextureManager::CreateAtlasArray( GetDevice().Get(), GetContext().Get(), txView, 2048, 6 ); + + // Map descriptors back to Gothic texture pointers + for ( size_t i = 0; i < texPtrs.size(); i++ ) { + size_t srcIdx = rangeStart + i; + m_TextureAtlasLookup[uniqueTextures[srcIdx].gothicTexture] = { + fmt, atlas.descriptors[i] + }; + } + + m_TextureAtlasses[fmt] = atlas; + rangeStart = rangeEnd; + } + + LogInfo() << "Atlas: " << uniqueTextures.size() << " unique textures, " << m_TextureAtlasLookup.size() << " mapped"; + + // Build global VB/IB and indirect args from atlas data + BuildStaticGeometryBuffers(); + + // Build GPU structured buffers for compute shader culling + // currently only used with static vobs when we do atlases. + BuildGPUCullingBuffers(); +} + +void D3D11GraphicsEngine::CacheWorldStaticVobs() { + + static std::vector _1; + static std::vector _2; + m_StaticVobs.clear(); + m_StaticVobs.reserve( 1024 ); + + LegacyRenderQueueProxy q( m_StaticVobs, _1, _2 ); + RndCullContext ctx; + ctx.queue = &q; + ctx.cameraPosition = XMFLOAT3( 0, 0, 0 ); + ctx.stage = RenderStage::STAGE_DRAW_WORLD; + ctx.frustum = Frustum::AlwaysContainingFrustum(); + ctx.drawDistances.OutdoorVobs = 1'000'000; + ctx.drawDistances.OutdoorVobsSmall = ctx.drawDistances.OutdoorVobs; + ctx.drawDistances.IndoorVobs = 0; + ctx.drawDistances.VisualFX = 0; + Engine::GAPI->CollectVisibleVobs( ctx, (EBspTreeCollectFlags)(EBspTreeCollectFlags::COLLECT_VOBS | EBspTreeCollectFlags::COLLECT_DISABLE_CHECK_DIST) ); + + const size_t totalItems = m_StaticVobs.size(); + // Correct math to calculate exact number of batches (rounds up to nearest multiple of 8, AVX ;) ) + const size_t numBatches = (totalItems + 7) / 8; + m_StaticVobsAABBs.clear(); + m_StaticVobsAABBs.reserve( numBatches ); + + for ( size_t i = 0; i < numBatches; ++i ) { + AABB_SoA_Batch8 b = {}; // Zero-initialize the batch + + // Fill the 8 slots in this batch + for ( int j = 0; j < 8; ++j ) { + size_t vobIdx = (i * 8) + j; + + if ( vobIdx < totalItems ) { + // Valid item: Extract and store + DirectX::BoundingBox bb = Frustum::Frustum::BBoxFromzTBBox3D( m_StaticVobs[vobIdx]->Vob->GetBBox() ); + + b.cx[j] = bb.Center.x; + b.cy[j] = bb.Center.y; + b.cz[j] = bb.Center.z; + + b.ex[j] = bb.Extents.x; + b.ey[j] = bb.Extents.y; + b.ez[j] = bb.Extents.z; + } else { + // Out of bounds (tail padding): + // Insert a dummy AABB far outside the map so it is guaranteed to be culled. + // This prevents invalid indices from entering your RenderQueue! + b.cx[j] = 1000000.0f; + b.cy[j] = 1000000.0f; + b.cz[j] = 1000000.0f; + + b.ex[j] = 0.0f; + b.ey[j] = 0.0f; + b.ez[j] = 0.0f; + } + } + + m_StaticVobsAABBs.push_back( b ); + } +} + +void D3D11GraphicsEngine::OnWorldLoaded() +{ + CacheWorldStaticVobs(); + + // --- Atlas building: collect unique textures, create Texture2DArray atlases, map descriptors --- + BuildSceneTextureAtlasses(); +} + void D3D11GraphicsEngine::StoreVobPreviousTransforms() { if ( !zCCamera::GetCamera() ) { return; // only do this if we actually are in-game diff --git a/D3D11Engine/D3D11GraphicsEngine.h b/D3D11Engine/D3D11GraphicsEngine.h index 33405f66..d64ad629 100644 --- a/D3D11Engine/D3D11GraphicsEngine.h +++ b/D3D11Engine/D3D11GraphicsEngine.h @@ -4,6 +4,9 @@ #include "GothicAPI.h" #include "D3D11ShadowMap.h" #include "D3D11ShaderManager.h" +#include "D3D11TextureAtlasManager.h" +#include "D3D11StructuredBuffer.h" +#include "D3D11IndirectBuffer.h" struct RenderToDepthStencilBuffer; @@ -30,6 +33,7 @@ const unsigned int MORPHEDMESH_HIGH_BUFFER_SIZE = 20480 * sizeof( ExVertexStruct const unsigned int HUD_BUFFER_SIZE = 6 * sizeof( ExVertexStruct ); const int NUM_MAX_BONES = 96; const int unsigned INSTANCING_BUFFER_SIZE = sizeof( VobInstanceInfo ) * 2048; +constexpr size_t TEXTURE_ATLAS_MAX = DXGI_FORMAT_V408 + 1; class D3D11PointLight; @@ -54,6 +58,25 @@ struct AlphaMeshData { std::vector instances; }; +// Tracks one unique submesh in the global geometry buffer +struct StaticSubmeshEntry { + UINT indexCount; + UINT startIndexLocation; // offset into global IB + int baseVertexLocation; // offset into global VB + TextureDescriptor atlasDesc; + MeshVisualInfo* visual; // which visual owns this submesh +}; + +// Groups all submeshes that share one atlas (same DXGI_FORMAT) +struct AtlasDrawGroup { + DXGI_FORMAT format = DXGI_FORMAT_UNKNOWN; + std::vector submeshes; + std::vector indirectArgs; + std::unique_ptr indirectBuffer; + UINT mergedArgsOffset = 0; // byte offset into merged indirect args buffer + UINT mergedArgsCount = 0; // number of args in this group +}; + class D3D11GraphicsEngine : public D3D11GraphicsEngineBase { public: D3D11GraphicsEngine(); @@ -246,6 +269,15 @@ class D3D11GraphicsEngine : public D3D11GraphicsEngineBase { XRESULT DrawVOBsInstanced(); XRESULT DrawFrameAlphaMeshes(); + /** Draws static vobs using atlas indirect path */ + XRESULT DrawVOBsIndirect( const Frustum& frustum, bool bindPS = true ); + + /** Builds global VB/IB and indirect args from atlas data (called from OnWorldLoaded) */ + void BuildStaticGeometryBuffers(); + + /** Builds GPU data for compute shader culling (called after BuildStaticGeometryBuffers) */ + void BuildGPUCullingBuffers(); + /** Set wind props in const buffer */ void ApplyWindProps( VS_ExConstantBuffer_Wind& windBuff ); @@ -348,17 +380,23 @@ class D3D11GraphicsEngine : public D3D11GraphicsEngineBase { RenderToTextureBuffer* GetVelocityBuffer() const { return VelocityBuffer.get(); } const XMFLOAT4X4& GetPrevViewProjMatrix() const { return m_PrevViewProjMatrix; } - void StorePrevViewProjMatrix(); auto GetClampSamplerState() -> auto { return ClampSamplerState.Get(); } auto GetCubeSamplerState() -> auto { return CubeSamplerState.Get(); } auto GetLinearSamplerState() -> auto { return LinearSamplerState.Get(); } D3D11ShadowMap* GetShadowMaps() const { return ShadowMaps.get(); } + void OnWorldLoaded() override; protected: void StoreVobPreviousTransforms(); + void StorePrevViewProjMatrix(); + + void BuildSceneTextureAtlasses(); + + void CacheWorldStaticVobs(); + std::unique_ptr m_FrameLimiter; int m_LastFrameLimit; @@ -473,4 +511,26 @@ class D3D11GraphicsEngine : public D3D11GraphicsEngineBase { INT2 NewResolution; void CreateAndBindDefaultSampler(); + + std::vector m_StaticVobs{}; + std::vector m_StaticVobsAABBs{}; + std::array m_TextureAtlasses{}; + + /** Atlas indirect draw path */ + std::unordered_map m_TextureAtlasLookup; + std::unique_ptr m_StaticGlobalVertexBuffer; + std::unique_ptr m_StaticGlobalIndexBuffer; + std::unique_ptr m_GlobalInstanceIdBuffer; + std::vector m_AtlasDrawGroups; + std::unique_ptr> m_StaticVobInstanceBuffer; + + /** GPU culling buffers (created once at world load) */ + std::unique_ptr> m_VobGPUBuffer; + std::unique_ptr> m_SubmeshGPUBuffer; + std::unique_ptr> m_InstanceBufferGPU; + std::unique_ptr m_MergedIndirectArgs; + Microsoft::WRL::ComPtr m_IndirectArgsTemplate; + std::unique_ptr m_CullConstantBuffer; + std::vector m_MergedArgsReset; // CPU-side template for reset + UINT m_TotalMaxInstances = 0; }; diff --git a/D3D11Engine/D3D11ShaderManager.cpp b/D3D11Engine/D3D11ShaderManager.cpp index 483f3ead..f15a36fc 100644 --- a/D3D11Engine/D3D11ShaderManager.cpp +++ b/D3D11Engine/D3D11ShaderManager.cpp @@ -245,6 +245,10 @@ XRESULT D3D11ShaderManager::Init() { Shaders.push_back( ShaderInfo::make( "VS_ExInstancedObj.hlsl" ) .with_layout( 10 ) ); + Shaders.push_back( ShaderInfo::make("VS_ExInstancedObjIndirectAtlas.hlsl", 12 ) + .with_layout( 12 ) + .with_cbuffer( sizeof( VS_ExConstantBuffer_PerFrame ) ) + .with_cbuffer( sizeof( VS_ExConstantBuffer_Wind ) ) ); Shaders.push_back( ShaderInfo::make( "VS_ExInstanced.hlsl" ) .with_layout( 4 ) ); @@ -360,6 +364,28 @@ XRESULT D3D11ShaderManager::Init() { Shaders.push_back( ShaderInfo::make( "PS_PortalDiffuse.hlsl" ) ); //forest portals, doors, etc. Shaders.push_back( ShaderInfo::make( "PS_WaterfallFoam.hlsl" ) ); //foam on at the base of waterfalls + + Shaders.push_back( ShaderInfo::make( "PS_DiffuseAtlas.hlsl") + .with_macros( makros ) + .with_cbuffer( sizeof( GothicGraphicsState ) ) + .with_cbuffer( sizeof( AtmosphereConstantBuffer ) ) + .with_cbuffer( sizeof( MaterialInfo::Buffer ) ) + .with_cbuffer( sizeof( float4 ) ) ); // DIST_Distance + + makros.clear(); + m.Name = "NORMALMAPPING"; + m.Definition = "0"; + makros.push_back( m ); + m.Name = "ALPHATEST"; + m.Definition = "1"; + makros.push_back( m ); + + Shaders.push_back( ShaderInfo::make( "PS_DiffuseAtlas.hlsl" ) + .with_macros( makros ) + .with_cbuffer( sizeof( GothicGraphicsState ) ) + .with_cbuffer( sizeof( AtmosphereConstantBuffer ) ) + .with_cbuffer( sizeof( MaterialInfo::Buffer ) ) + .with_cbuffer( sizeof( float4 ) ) ); // DIST_Distance makros.clear(); @@ -542,7 +568,9 @@ XRESULT D3D11ShaderManager::Init() { Shaders.push_back( ShaderInfo::make( "CS_LightCulling.hlsl" )); Shaders.push_back( ShaderInfo::make( "CS_TiledShading.hlsl" )); - } + + + Shaders.push_back( ShaderInfo::make( "CS_CullVobs.hlsl" ));} return XR_SUCCESS; } diff --git a/D3D11Engine/D3D11ShadowMap.cpp b/D3D11Engine/D3D11ShadowMap.cpp index eb723a52..0e1284a3 100644 --- a/D3D11Engine/D3D11ShadowMap.cpp +++ b/D3D11Engine/D3D11ShadowMap.cpp @@ -338,10 +338,6 @@ void D3D11ShadowMap::Init( Microsoft::WRL::ComPtr& device, Micros EnsureShadowMapBackend( s ); - for ( int i = 0; i < MAX_CSM_CASCADES; ++i ) { - m_RenderQueues[i] = std::make_unique( device.Get(), context.Get() ); - } - D3D11GraphicsEngineBase* engine = reinterpret_cast( Engine::GraphicsEngine ); // Create constantbuffer for the view-matrices @@ -648,81 +644,6 @@ XRESULT D3D11ShadowMap::PrepareRender() } } - // Collect all VOBs inside our shadow draw distance (last frustum) - - static std::vector potentialCasters; - static std::vector _1; - static std::vector _2; - potentialCasters.reserve(1024); - potentialCasters.clear(); - - { - RndCullContext ctx; - LegacyRenderQueueProxy q(potentialCasters, _1, _2); - - ctx.queue = &q; - ctx.frustum = Frustum::AlwaysContainingFrustum(); - ctx.cameraPosition = m_WorldShadowPos; - ctx.stage = RenderStage::STAGE_DRAW_SHADOWS; - ctx.drawDistances.OutdoorVobs = 20000; - ctx.drawDistances.OutdoorVobsSmall = 20000; - - Engine::GAPI->CollectVisibleVobs( ctx ); - } - - auto invView = XMMatrixTranspose(XMLoadFloat4x4(&zCCamera::GetCamera()->GetTransformDX( zCCamera::ETransformType::TT_VIEW_INV ))); - auto camPos = invView.r[3]; - XMVECTOR camForward = XMVector3Normalize( invView.r[2]); - - for ( int i = 0; i < numCascades; ++i ) { - m_RenderQueues[i]->Reset(); - } - - if ( numCascades > 3 ) { - for ( auto vob : potentialCasters ) { - - auto boundingSphere = Frustum::BSphereFromzTBBox3D( vob->Vob->GetBBox() ); - if ( m_CascadeCRs[0].frustum.Intersects( boundingSphere ) ) - m_RenderQueues[0]->GetVobs().push_back( vob ); - - if ( /*m_ShouldUpdateCascade[1] && */m_CascadeCRs[1].frustum.Intersects(boundingSphere) ) - m_RenderQueues[1]->GetVobs().push_back( vob ); - - if ( m_ShouldUpdateCascade[2] && m_CascadeCRs[2].frustum.Intersects( boundingSphere ) ) - m_RenderQueues[2]->GetVobs().push_back( vob ); - - if ( m_ShouldUpdateCascade[3] && m_CascadeCRs[3].frustum.Intersects( boundingSphere ) ) - m_RenderQueues[3]->GetVobs().push_back( vob ); - } - } else if ( numCascades > 2 ) { - for ( auto vob : potentialCasters ) { - auto boundingSphere = Frustum::BSphereFromzTBBox3D( vob->Vob->GetBBox() ); - if ( m_CascadeCRs[0].frustum.Intersects( boundingSphere ) ) - m_RenderQueues[0]->GetVobs().push_back( vob ); - - if ( /*m_ShouldUpdateCascade[1] && */m_CascadeCRs[1].frustum.Intersects( boundingSphere ) ) - m_RenderQueues[1]->GetVobs().push_back( vob ); - - if ( m_ShouldUpdateCascade[2] && m_CascadeCRs[2].frustum.Intersects( boundingSphere ) ) - m_RenderQueues[2]->GetVobs().push_back( vob ); - } - } else if ( numCascades > 1 ) { - for ( auto vob : potentialCasters ) { - auto boundingSphere = Frustum::BSphereFromzTBBox3D( vob->Vob->GetBBox() ); - if ( m_CascadeCRs[0].frustum.Intersects( boundingSphere ) ) - m_RenderQueues[0]->GetVobs().push_back( vob ); - - if ( /*m_ShouldUpdateCascade[1] && */m_CascadeCRs[1].frustum.Intersects( boundingSphere ) ) - m_RenderQueues[1]->GetVobs().push_back( vob ); - } - } else if ( numCascades > 0 ) { - for ( auto vob : potentialCasters ) { - auto boundingSphere = Frustum::BSphereFromzTBBox3D( vob->Vob->GetBBox() ); - if ( m_CascadeCRs[0].frustum.Intersects( boundingSphere ) ) - m_RenderQueues[0]->GetVobs().push_back( vob ); - } - } - return XR_SUCCESS; } @@ -980,7 +901,6 @@ XRESULT D3D11ShadowMap::DrawWorldShadow( ) RenderShadowmaps( renderParams ); Engine::GAPI->SetCameraReplacementPtr( nullptr ); - m_RenderQueues[cascadeIdx]->Reset(); } } diff --git a/D3D11Engine/D3D11ShadowMap.h b/D3D11Engine/D3D11ShadowMap.h index e179f779..bc3f8e48 100644 --- a/D3D11Engine/D3D11ShadowMap.h +++ b/D3D11Engine/D3D11ShadowMap.h @@ -168,7 +168,6 @@ class D3D11ShadowMap { /* 4 */ { 0.80f, 1.0f }, // Players should really want to use 4 cascades for best quality and furthest }; - D3D11RenderQueue* GetRenderQueue( int cascadeIndex ) { return m_RenderQueues[cascadeIndex].get(); } private: bool ShouldUseAtlas() const; void RecreateShadowSampler(); @@ -190,7 +189,6 @@ class D3D11ShadowMap { Microsoft::WRL::ComPtr m_shadowmapSampler; int m_lastNumCascades = 0; std::array m_CascadeCRs; - std::array, MAX_CSM_CASCADES> m_RenderQueues; std::vector m_CascadeSplits; std::array m_ShouldUpdateCascade; XMFLOAT3 m_WorldShadowPos; diff --git a/D3D11Engine/D3D11StructuredBuffer.h b/D3D11Engine/D3D11StructuredBuffer.h new file mode 100644 index 00000000..1d2a61c5 --- /dev/null +++ b/D3D11Engine/D3D11StructuredBuffer.h @@ -0,0 +1,134 @@ +#pragma once + +#include "pch.h" +#include + +// Templated structured buffer for GPU compute/shader access +template +class D3D11StructuredBuffer { +public: + D3D11StructuredBuffer() : ElementCount( 0 ), MaxElementCount( 0 ) {} + + ~D3D11StructuredBuffer() = default; + + // Initialize the buffer with a maximum capacity + HRESULT Init( ID3D11Device* device, UINT maxElements, bool cpuWrite = true, bool gpuWrite = false ) { + MaxElementCount = maxElements; + ElementCount = 0; + + D3D11_BUFFER_DESC desc = {}; + desc.ByteWidth = sizeof( T ) * maxElements; + desc.StructureByteStride = sizeof( T ); + desc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED; + + if ( cpuWrite ) { + desc.Usage = D3D11_USAGE_DYNAMIC; + desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + } else if ( gpuWrite ) { + desc.Usage = D3D11_USAGE_DEFAULT; + desc.CPUAccessFlags = 0; + desc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS; + } else { + desc.Usage = D3D11_USAGE_DEFAULT; + desc.CPUAccessFlags = 0; + desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + } + + HRESULT hr = device->CreateBuffer( &desc, nullptr, Buffer.GetAddressOf() ); + if ( FAILED( hr ) ) return hr; + + // Create SRV + D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.Format = DXGI_FORMAT_UNKNOWN; + srvDesc.ViewDimension = D3D11_SRV_DIMENSION_BUFFER; + srvDesc.Buffer.FirstElement = 0; + srvDesc.Buffer.NumElements = maxElements; + + hr = device->CreateShaderResourceView( Buffer.Get(), &srvDesc, SRV.GetAddressOf() ); + if ( FAILED( hr ) ) return hr; + + // Create UAV if GPU writable + if ( gpuWrite ) { + D3D11_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; + uavDesc.Format = DXGI_FORMAT_UNKNOWN; + uavDesc.ViewDimension = D3D11_UAV_DIMENSION_BUFFER; + uavDesc.Buffer.FirstElement = 0; + uavDesc.Buffer.NumElements = maxElements; + + hr = device->CreateUnorderedAccessView( Buffer.Get(), &uavDesc, UAV.GetAddressOf() ); + if ( FAILED( hr ) ) return hr; + } + + return S_OK; + } + + // Update buffer contents (for dynamic buffers) + HRESULT UpdateBuffer( ID3D11DeviceContext* context, const T* data, UINT count ) { + if ( count > MaxElementCount ) { + LogError() << "StructuredBuffer overflow: " << count << " > " << MaxElementCount; + count = MaxElementCount; + } + + ElementCount = count; + + D3D11_MAPPED_SUBRESOURCE mapped; + HRESULT hr = context->Map( Buffer.Get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped ); + if ( FAILED( hr ) ) return hr; + + memcpy( mapped.pData, data, sizeof( T ) * count ); + context->Unmap( Buffer.Get(), 0 ); + + return S_OK; + } + + // Update buffer contents (for default buffers) + void UpdateBufferDefault( ID3D11DeviceContext* context, const T* data, UINT count ) { + if ( count > MaxElementCount ) { + LogError() << "StructuredBuffer overflow: " << count << " > " << MaxElementCount; + count = MaxElementCount; + } + ElementCount = count; + context->UpdateSubresource( Buffer.Get(), 0, nullptr, data, 0, 0 ); + } + + // Bind to vertex shader + void BindToVertexShader( ID3D11DeviceContext* context, UINT slot ) { + context->VSSetShaderResources( slot, 1, SRV.GetAddressOf() ); + } + + // Bind to pixel shader + void BindToPixelShader( ID3D11DeviceContext* context, UINT slot ) { + context->PSSetShaderResources( slot, 1, SRV.GetAddressOf() ); + } + + // Unbind from vertex shader + void UnbindFromVertexShader( ID3D11DeviceContext* context, UINT slot ) { + ID3D11ShaderResourceView* nullSRV = nullptr; + context->VSSetShaderResources( slot, 1, &nullSRV ); + } + + // Bind to compute shader (SRV) + void BindToComputeShader( ID3D11DeviceContext* context, UINT slot ) { + context->CSSetShaderResources( slot, 1, SRV.GetAddressOf() ); + } + + // Unbind from compute shader + void UnbindFromComputeShader( ID3D11DeviceContext* context, UINT slot ) { + ID3D11ShaderResourceView* nullSRV = nullptr; + context->CSSetShaderResources( slot, 1, &nullSRV ); + } + + UINT GetElementCount() const { return ElementCount; } + UINT GetMaxElementCount() const { return MaxElementCount; } + ID3D11Buffer* GetBuffer() const { return Buffer.Get(); } + ID3D11ShaderResourceView* GetSRV() const { return SRV.Get(); } + ID3D11UnorderedAccessView* GetUAV() const { return UAV.Get(); } + +private: + Microsoft::WRL::ComPtr Buffer; + Microsoft::WRL::ComPtr SRV; + Microsoft::WRL::ComPtr UAV; + UINT ElementCount; + UINT MaxElementCount; +}; diff --git a/D3D11Engine/D3D11TextureAtlasManager.h b/D3D11Engine/D3D11TextureAtlasManager.h new file mode 100644 index 00000000..832e7eda --- /dev/null +++ b/D3D11Engine/D3D11TextureAtlasManager.h @@ -0,0 +1,274 @@ +#pragma once +#include "pch.h" + +#include +#include +#include +#include +#include +#include "ConstantBufferStructs.h" + +// Internal struct for bin packing +struct PackItem { + int originalIndex; + UINT width; + UINT height; + UINT x, y, slice; + ID3D11Texture2D* texture; + D3D11_TEXTURE2D_DESC desc; +}; + +class TextureManager { +private: + // Helper to align sizes for power-of-two mip boundaries + static UINT Align( UINT value, UINT alignment ) { + return (value + alignment - 1) & ~(alignment - 1); + } + + // Returns the block size for BC compressed formats (4), or 1 for uncompressed + static UINT GetBlockSize( DXGI_FORMAT fmt ) { + switch ( fmt ) { + case DXGI_FORMAT_BC1_UNORM: case DXGI_FORMAT_BC1_UNORM_SRGB: + case DXGI_FORMAT_BC2_UNORM: case DXGI_FORMAT_BC2_UNORM_SRGB: + case DXGI_FORMAT_BC3_UNORM: case DXGI_FORMAT_BC3_UNORM_SRGB: + return 4; + default: return 1; + } + } + + // Generates the mip levels that are missing from the source texture (item.desc.MipLevels < mipLevels) + // by decompressing the last source mip, running box-filter downsampling, re-compressing to + // atlasFormat, and uploading each new level into the atlas via a temporary immutable texture. + static void GenerateMissingMips( + ID3D11Device* device, ID3D11DeviceContext* context, + ID3D11Texture2D* atlasTextureArray, + const PackItem& item, DXGI_FORMAT atlasFormat, UINT mipLevels ) + { + // Capture the source texture to CPU memory (creates an internal staging copy) + DirectX::ScratchImage captured; + if ( FAILED( DirectX::CaptureTexture( device, context, item.texture, captured ) ) ) + return; + + // Grab the last available mip as the downsampling base + const DirectX::Image* lastMipImg = captured.GetImage( item.desc.MipLevels - 1, 0, 0 ); + if ( !lastMipImg ) return; + + // GenerateMipMaps requires uncompressed input — decompress BC textures first + DirectX::ScratchImage decompressed; + const DirectX::Image* baseImg = lastMipImg; + if ( DirectX::IsCompressed( lastMipImg->format ) ) { + if ( FAILED( DirectX::Decompress( *lastMipImg, DXGI_FORMAT_R8G8B8A8_UNORM, decompressed ) ) ) + return; + baseImg = decompressed.GetImage( 0, 0, 0 ); + } + + // Generate: level 0 = base (already copied to atlas), levels 1..N = the missing mips + UINT levelsToGen = mipLevels - item.desc.MipLevels + 1; + DirectX::ScratchImage mipChain; + if ( FAILED( DirectX::GenerateMipMaps( *baseImg, DirectX::TEX_FILTER_BOX, levelsToGen, mipChain ) ) ) + return; + + // Re-compress the generated levels back to the atlas BC format. + // Try GPU-accelerated compression first; fall back to CPU if unsupported. + const DirectX::ScratchImage* finalChain = &mipChain; + DirectX::ScratchImage recompressed; + if ( DirectX::IsCompressed( atlasFormat ) ) { + HRESULT hr = DirectX::Compress( device, + mipChain.GetImages(), mipChain.GetImageCount(), mipChain.GetMetadata(), + atlasFormat, DirectX::TEX_COMPRESS_DEFAULT, DirectX::TEX_ALPHA_WEIGHT_DEFAULT, + recompressed ); + if ( FAILED( hr ) ) { + // GPU BC compression not supported on this hardware — use CPU path + recompressed = DirectX::ScratchImage{}; + if ( FAILED( DirectX::Compress( + mipChain.GetImages(), mipChain.GetImageCount(), mipChain.GetMetadata(), + atlasFormat, DirectX::TEX_COMPRESS_DEFAULT, DirectX::TEX_ALPHA_WEIGHT_DEFAULT, + recompressed ) ) ) + return; + } + finalChain = &recompressed; + } + + // Upload each new mip via a temporary immutable texture + CopySubresourceRegion + for ( UINT mip = item.desc.MipLevels; mip < mipLevels; ++mip ) { + // chainIdx 0 = the base (already in atlas), so start at 1 + UINT chainIdx = mip - item.desc.MipLevels + 1; + const DirectX::Image* src = finalChain->GetImage( chainIdx, 0, 0 ); + if ( !src || !src->pixels ) continue; + + D3D11_TEXTURE2D_DESC tmpDesc = {}; + tmpDesc.Width = (UINT)src->width; + tmpDesc.Height = (UINT)src->height; + tmpDesc.MipLevels = 1; + tmpDesc.ArraySize = 1; + tmpDesc.Format = src->format; + tmpDesc.SampleDesc.Count = 1; + tmpDesc.Usage = D3D11_USAGE_IMMUTABLE; + tmpDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + + D3D11_SUBRESOURCE_DATA initData = {}; + initData.pSysMem = src->pixels; + initData.SysMemPitch = (UINT)src->rowPitch; + + ID3D11Texture2D* tmpTex = nullptr; + if ( SUCCEEDED( device->CreateTexture2D( &tmpDesc, &initData, &tmpTex ) ) ) { + UINT mipX = item.x >> mip; + UINT mipY = item.y >> mip; + UINT dstSub = D3D11CalcSubresource( mip, item.slice, mipLevels ); + D3D11_BOX box = { 0, 0, 0, (UINT)src->width, (UINT)src->height, 1 }; + context->CopySubresourceRegion( atlasTextureArray, dstSub, mipX, mipY, 0, tmpTex, 0, &box ); + tmpTex->Release(); + } + } + } + +public: + struct AtlasResult { + ID3D11Texture2D* atlasTextureArray = nullptr; + ID3D11ShaderResourceView* atlasSRV = nullptr; + std::vector descriptors; + + void Destroy() { + SAFE_RELEASE( atlasSRV ); + SAFE_RELEASE( atlasTextureArray ); + descriptors.clear(); + } + }; + + static AtlasResult CreateAtlasArray( ID3D11Device* device, ID3D11DeviceContext* context, + std::basic_string_view sourceTextures, + // const std::vector& sourceTextures, + UINT atlasSize = 2048, UINT mipLevels = 6 ) + { + if ( sourceTextures.empty() ) return {}; + + AtlasResult result; + result.descriptors.resize( sourceTextures.size() ); + + // Determine format from first texture for alignment calculation. + // For BC formats (blockSize=4), coordinates must remain block-aligned at every mip level. + D3D11_TEXTURE2D_DESC firstDesc; + sourceTextures[0]->GetDesc( &firstDesc ); + DXGI_FORMAT atlasFormat = firstDesc.Format; + + const UINT blockSize = GetBlockSize( atlasFormat ); + const UINT MipAlignment = blockSize * (1 << (mipLevels - 1)); + + std::vector items; + items.reserve( sourceTextures.size() ); + + // 1. Extract info and validate + for ( size_t i = 0; i < sourceTextures.size(); ++i ) { + D3D11_TEXTURE2D_DESC desc; + sourceTextures[i]->GetDesc( &desc ); + + if ( desc.Format != atlasFormat ) { + // For a Texture2DArray, all formats must match. + throw std::runtime_error( "All textures must have the same DXGI_FORMAT." ); + } + + items.push_back( { (int)i, desc.Width, desc.Height, 0, 0, 0, sourceTextures[i], desc}); + } + + // 2. Sort by height descending for optimal shelf-packing + std::sort( items.begin(), items.end(), []( const PackItem& a, const PackItem& b ) { + return a.height > b.height; + } ); + + // 3. CPU Bin Packing (Shelf Packing Algorithm) + UINT currentX = 0, currentY = 0, currentShelfHeight = 0, currentSlice = 0; + + for ( auto& item : items ) { + UINT alignedW = Align( item.width, MipAlignment ); + UINT alignedH = Align( item.height, MipAlignment ); + + // Move to next shelf if it doesn't fit horizontally + if ( currentX + alignedW > atlasSize ) { + currentX = 0; + currentY += Align( currentShelfHeight, MipAlignment ); + currentShelfHeight = 0; + } + + // Move to next array slice if it doesn't fit vertically + if ( currentY + alignedH > atlasSize ) { + currentSlice++; + currentX = 0; + currentY = 0; + currentShelfHeight = 0; + } + + item.x = currentX; + item.y = currentY; + item.slice = currentSlice; + + currentX += alignedW; + currentShelfHeight = std::max( currentShelfHeight, alignedH ); + } + + UINT totalSlices = currentSlice + 1; + + // 4. Create the target Texture2DArray + D3D11_TEXTURE2D_DESC arrayDesc = {}; + arrayDesc.Width = atlasSize; + arrayDesc.Height = atlasSize; + arrayDesc.MipLevels = mipLevels; + arrayDesc.ArraySize = totalSlices; + arrayDesc.Format = atlasFormat; + arrayDesc.SampleDesc.Count = 1; + arrayDesc.SampleDesc.Quality = 0; + arrayDesc.Usage = D3D11_USAGE_DEFAULT; + arrayDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + + if ( FAILED( device->CreateTexture2D( &arrayDesc, nullptr, &result.atlasTextureArray ) ) ) { + throw std::runtime_error( "Failed to create Texture2DArray atlas." ); + } + + // 5. GPU CopySubresourceRegion (Extremely fast, zero CPU-readback) + + for ( const auto& item : items ) { + UINT maxMipsToCopy = std::min( item.desc.MipLevels, mipLevels ); + + for ( UINT mip = 0; mip < maxMipsToCopy; ++mip ) { + // Calculate scaled coordinates for the current mip level + UINT mipX = item.x >> mip; + UINT mipY = item.y >> mip; + + // Mip source & destination indices + UINT srcSub = D3D11CalcSubresource( mip, 0, item.desc.MipLevels ); + UINT dstSub = D3D11CalcSubresource( mip, item.slice, mipLevels ); + + context->CopySubresourceRegion( + result.atlasTextureArray, dstSub, + mipX, mipY, 0, + item.texture, srcSub, + nullptr // nullptr means copy the whole subresource + ); + } + + // 5b. Fill missing MIP levels using DirectXTex bilinear downsampling + re-compression. + if ( item.desc.MipLevels < mipLevels ) + GenerateMissingMips( device, context, result.atlasTextureArray, item, atlasFormat, mipLevels ); + + // Write out descriptors in the *original* input order + TextureDescriptor& outDesc = result.descriptors[item.originalIndex]; + outDesc.slice = item.slice; + outDesc.uStart = (float)item.x / atlasSize; + outDesc.vStart = (float)item.y / atlasSize; + outDesc.uEnd = (float)(item.x + item.width) / atlasSize; + outDesc.vEnd = (float)(item.y + item.height) / atlasSize; + } + + // 6. Create SRV + D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.Format = atlasFormat; + srvDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2DARRAY; + srvDesc.Texture2DArray.MostDetailedMip = 0; + srvDesc.Texture2DArray.MipLevels = mipLevels; + srvDesc.Texture2DArray.FirstArraySlice = 0; + srvDesc.Texture2DArray.ArraySize = totalSlices; + + device->CreateShaderResourceView( result.atlasTextureArray, &srvDesc, &result.atlasSRV); + + return result; + } +}; diff --git a/D3D11Engine/Frustum.h b/D3D11Engine/Frustum.h index 6834d571..6dc86ae3 100644 --- a/D3D11Engine/Frustum.h +++ b/D3D11Engine/Frustum.h @@ -59,6 +59,9 @@ class Frustum // Transform correctly to World Space viewSpaceFrustum.Transform( m_orientedBox, invView ); + + CacheOBBPlanes(); + m_useBoundingOrientedBox = true; m_useSphere = false; m_always_containing = false; @@ -72,7 +75,7 @@ class Frustum f.isValid = true; return f; } - + bool SupportsCulling() const { return !m_always_containing; } // Für perspektivische Projektion (normale Kamera) @@ -117,23 +120,63 @@ class Frustum if (m_useSphere) { return m_boundingSphere.Intersects(aabb); } - if (m_useBoundingOrientedBox) { - return m_orientedBox.Intersects(aabb); + + const float cx = aabb.Center.x; + const float cy = aabb.Center.y; + const float cz = aabb.Center.z; + const float ex = aabb.Extents.x; + const float ey = aabb.Extents.y; + const float ez = aabb.Extents.z; + + for ( int i = 0; i < 6; ++i ) { + const float nx = m_cachedPlanes[i].x; + const float ny = m_cachedPlanes[i].y; + const float nz = m_cachedPlanes[i].z; + const float w = m_cachedPlanes[i].w; + + // Distance from the AABB center to the plane + const float dist = nx * cx + ny * cy + nz * cz + w; + + // Projected radius of the AABB onto the plane's normal + const float projRadius = ex * std::abs( nx ) + ey * std::abs( ny ) + ez * std::abs( nz ); + + // If the center is further outside the plane than its projected radius, + // the entire box is disjoint. We can early-out immediately. + if ( dist > projRadius ) { + return false; + } } - return m_frustum.Intersects(aabb); + + // If no separating plane was found, it must be intersecting or contained. + return true; } // Schneller Sphere-Test für VOBs - bool Intersects(const BoundingSphere& sphere) const { - if (m_always_containing) return true; + bool Intersects( const BoundingSphere& sphere ) const { + if ( m_always_containing ) return true; - if (m_useSphere) { - return m_boundingSphere.Intersects(sphere); + if ( m_useSphere ) { + return m_boundingSphere.Intersects( sphere ); } - if (m_useBoundingOrientedBox) { - return m_orientedBox.Intersects(sphere); + + const float cx = sphere.Center.x; + const float cy = sphere.Center.y; + const float cz = sphere.Center.z; + const float r = sphere.Radius; + + // Scalar early-out loop. + // For outward-facing planes, if distance > radius, it is completely outside. + for ( int i = 0; i < 6; ++i ) { + const float dist = m_cachedPlanes[i].x * cx + + m_cachedPlanes[i].y * cy + + m_cachedPlanes[i].z * cz + + m_cachedPlanes[i].w; + if ( dist > r ) { + return false; + } } - return m_frustum.Intersects(sphere); + + return true; } // Schneller AABB-Test @@ -142,22 +185,52 @@ class Frustum if (m_useSphere) { return m_boundingSphere.Contains(aabb); } - if (m_useBoundingOrientedBox) { - return m_orientedBox.Contains(aabb); + + const float cx = aabb.Center.x; + const float cy = aabb.Center.y; + const float cz = aabb.Center.z; + const float ex = aabb.Extents.x; + const float ey = aabb.Extents.y; + const float ez = aabb.Extents.z; + + bool intersects = false; + + for ( int i = 0; i < 6; ++i ) { + const float nx = m_cachedPlanes[i].x; + const float ny = m_cachedPlanes[i].y; + const float nz = m_cachedPlanes[i].z; + const float w = m_cachedPlanes[i].w; + + // 1. Calculate distance from the AABB center to the plane + const float dist = nx * cx + ny * cy + nz * cz + w; + + // 2. Calculate the projected radius of the AABB onto the plane's normal + const float projRadius = ex * std::abs( nx ) + ey * std::abs( ny ) + ez * std::abs( nz ); + + // 3. Since planes are OUTWARD facing: + if ( dist > projRadius ) { + return DirectX::ContainmentType::DISJOINT; // Completely outside + } + if ( dist > -projRadius ) { + intersects = true; // Partially inside, keep checking the other planes + } } - return aabb.ContainedBy( - XMLoadFloat4(&m_cachedPlanes[0]), - XMLoadFloat4(&m_cachedPlanes[1]), - XMLoadFloat4(&m_cachedPlanes[2]), - XMLoadFloat4(&m_cachedPlanes[3]), - XMLoadFloat4(&m_cachedPlanes[4]), - XMLoadFloat4(&m_cachedPlanes[5]) - ); + + return intersects ? DirectX::ContainmentType::INTERSECTS : DirectX::ContainmentType::CONTAINS; } bool Intersects( const zTBBox3D& aabb ) const { if ( m_always_containing ) return true; - return Intersects( BBoxFromzTBBox3D( aabb ) ); + // Fast scalar conversion - avoids memory->SIMD->memory roundtrip + BoundingBox bb; + bb.Center.x = (aabb.Min.x + aabb.Max.x) * 0.5f; + bb.Center.y = (aabb.Min.y + aabb.Max.y) * 0.5f; + bb.Center.z = (aabb.Min.z + aabb.Max.z) * 0.5f; + bb.Extents.x = (aabb.Max.x - aabb.Min.x) * 0.5f; + bb.Extents.y = (aabb.Max.y - aabb.Min.y) * 0.5f; + bb.Extents.z = (aabb.Max.z - aabb.Min.z) * 0.5f; + + return Intersects( bb ); } DirectX::ContainmentType Contains(const zTBBox3D& aabb) const { @@ -192,12 +265,14 @@ class Frustum return Contains(bb); } - static BoundingBox BBoxFromzTBBox3D(const zTBBox3D& box) { + static BoundingBox BBoxFromzTBBox3D(const zTBBox3D& aabb) { BoundingBox bb; - XMVECTOR bbMin = XMLoadFloat3(&box.Min); - XMVECTOR bbMax = XMLoadFloat3(&box.Max); - XMStoreFloat3(&bb.Center, XMVectorScale(XMVectorAdd(bbMin, bbMax), 0.5f)); - XMStoreFloat3(&bb.Extents, XMVectorScale(XMVectorSubtract(bbMax, bbMin), 0.5f)); + bb.Center.x = (aabb.Min.x + aabb.Max.x) * 0.5f; + bb.Center.y = (aabb.Min.y + aabb.Max.y) * 0.5f; + bb.Center.z = (aabb.Min.z + aabb.Max.z) * 0.5f; + bb.Extents.x = (aabb.Max.x - aabb.Min.x) * 0.5f; + bb.Extents.y = (aabb.Max.y - aabb.Min.y) * 0.5f; + bb.Extents.z = (aabb.Max.z - aabb.Min.z) * 0.5f; return bb; } @@ -231,29 +306,29 @@ class Frustum XMVECTOR vOrigin = XMLoadFloat3(&m_frustum.Origin); XMVECTOR vOrientation = XMLoadFloat4(&m_frustum.Orientation); - // Left plane - XMVECTOR plane = XMVectorSet(-1.0f, 0.0f, m_frustum.LeftSlope, 0.0f); + // Near plane + XMVECTOR plane = XMVectorSet(0.0f, 0.0f, -1.0f, m_frustum.Near); plane = DirectX::MathInternal::XMPlaneTransform(plane, vOrientation, vOrigin); XMStoreFloat4(&m_cachedPlanes[0], XMPlaneNormalize(plane)); + // Left plane + plane = XMVectorSet(-1.0f, 0.0f, m_frustum.LeftSlope, 0.0f); + plane = DirectX::MathInternal::XMPlaneTransform(plane, vOrientation, vOrigin); + XMStoreFloat4(&m_cachedPlanes[1], XMPlaneNormalize(plane)); + // Right plane plane = XMVectorSet(1.0f, 0.0f, -m_frustum.RightSlope, 0.0f); plane = DirectX::MathInternal::XMPlaneTransform(plane, vOrientation, vOrigin); - XMStoreFloat4(&m_cachedPlanes[1], XMPlaneNormalize(plane)); + XMStoreFloat4(&m_cachedPlanes[2], XMPlaneNormalize(plane)); // Bottom plane plane = XMVectorSet(0.0f, -1.0f, m_frustum.BottomSlope, 0.0f); plane = DirectX::MathInternal::XMPlaneTransform(plane, vOrientation, vOrigin); - XMStoreFloat4(&m_cachedPlanes[2], XMPlaneNormalize(plane)); + XMStoreFloat4(&m_cachedPlanes[3], XMPlaneNormalize(plane)); // Top plane plane = XMVectorSet(0.0f, 1.0f, -m_frustum.TopSlope, 0.0f); plane = DirectX::MathInternal::XMPlaneTransform(plane, vOrientation, vOrigin); - XMStoreFloat4(&m_cachedPlanes[3], XMPlaneNormalize(plane)); - - // Near plane - plane = XMVectorSet(0.0f, 0.0f, -1.0f, m_frustum.Near); - plane = DirectX::MathInternal::XMPlaneTransform(plane, vOrientation, vOrigin); XMStoreFloat4(&m_cachedPlanes[4], XMPlaneNormalize(plane)); // Far plane @@ -261,6 +336,47 @@ class Frustum plane = DirectX::MathInternal::XMPlaneTransform(plane, vOrientation, vOrigin); XMStoreFloat4(&m_cachedPlanes[5], XMPlaneNormalize(plane)); } + + // Cache world-space planes from an Oriented Bounding Box (Directional Light / Ortho) +// Plane order: [0]=Left, [1]=Right, [2]=Bottom, [3]=Top, [4]=Near, [5]=Far +void CacheOBBPlanes() { + XMVECTOR C = XMLoadFloat3(&m_orientedBox.Center); + XMVECTOR E = XMLoadFloat3(&m_orientedBox.Extents); + XMVECTOR Q = XMLoadFloat4(&m_orientedBox.Orientation); + + XMMATRIX R = XMMatrixRotationQuaternion(Q); + XMVECTOR AxisX = R.r[0]; + XMVECTOR AxisY = R.r[1]; + XMVECTOR AxisZ = R.r[2]; + + XMVECTOR Ex = XMVectorSplatX(E); + XMVECTOR Ey = XMVectorSplatY(E); + XMVECTOR Ez = XMVectorSplatZ(E); + + // Near face: Min Z boundary. Outward normal is -AxisZ + XMVECTOR P_Near = XMVectorSubtract( C, XMVectorMultiply( AxisZ, Ez ) ); + XMStoreFloat4( &m_cachedPlanes[0], XMPlaneFromPointNormal( P_Near, XMVectorNegate( AxisZ ) ) ); + + // Left face: Min X boundary. Outward normal is -AxisX + XMVECTOR P_Left = XMVectorSubtract( C, XMVectorMultiply( AxisX, Ex ) ); + XMStoreFloat4( &m_cachedPlanes[1], XMPlaneFromPointNormal( P_Left, XMVectorNegate( AxisX ) ) ); + + // Right face: Max X boundary. Outward normal is +AxisX + XMVECTOR P_Right = XMVectorAdd( C, XMVectorMultiply( AxisX, Ex ) ); + XMStoreFloat4( &m_cachedPlanes[2], XMPlaneFromPointNormal( P_Right, AxisX ) ); + + // Bottom face: Min Y boundary. Outward normal is -AxisY + XMVECTOR P_Bottom = XMVectorSubtract( C, XMVectorMultiply( AxisY, Ey ) ); + XMStoreFloat4( &m_cachedPlanes[3], XMPlaneFromPointNormal( P_Bottom, XMVectorNegate( AxisY ) ) ); + + // Top face: Max Y boundary. Outward normal is +AxisY + XMVECTOR P_Top = XMVectorAdd( C, XMVectorMultiply( AxisY, Ey ) ); + XMStoreFloat4( &m_cachedPlanes[4], XMPlaneFromPointNormal( P_Top, AxisY ) ); + + // Far face: Max Z boundary. Outward normal is +AxisZ + XMVECTOR P_Far = XMVectorAdd( C, XMVectorMultiply( AxisZ, Ez ) ); + XMStoreFloat4( &m_cachedPlanes[5], XMPlaneFromPointNormal( P_Far, AxisZ ) ); +} private: // Helper to get frustum corners for AABB creation @@ -274,7 +390,7 @@ class Frustum BoundingSphere m_boundingSphere; BoundingOrientedBox m_orientedBox; - std::array m_cachedPlanes{}; // [0]=Left, [1]=Right, [2]=Bottom, [3]=Top, [4]=Near, [5]=Far + std::array m_cachedPlanes{}; bool m_useSphere = false; bool m_useBoundingOrientedBox = false; bool m_always_containing = false; diff --git a/D3D11Engine/GothicAPI.cpp b/D3D11Engine/GothicAPI.cpp index c19cbf4d..82d192ec 100644 --- a/D3D11Engine/GothicAPI.cpp +++ b/D3D11Engine/GothicAPI.cpp @@ -47,6 +47,7 @@ // TODO: REMOVE THIS! #include "D3D11GraphicsEngine.h" +#include "D3D11TextureAtlasManager.h" #ifndef PUBLIC_RELEASE #define OPT_DBG_NOINLINE __declspec(noinline) @@ -789,11 +790,11 @@ void GothicAPI::ResetVobs() { AnimatedSkeletalVobs.clear(); // Delete light vobs - for ( auto const& it : VobLightMap ) { + for ( auto const& it : VobLights_Sorted ) { Engine::GraphicsEngine->OnVobRemovedFromWorld( it.first ); delete it.second; } - VobLightMap.clear(); + VobLights_Sorted.clear(); } /** Called when the game loaded a new level */ @@ -877,12 +878,11 @@ void GothicAPI::OnWorldLoaded() { zCTree* vobTree = oCGame::GetGame()->_zCSession_world->GetGlobalVobTree(); TraverseVobTree( vobTree ); - // Build instancing cache for the static vobs for each section - BuildStaticMeshInstancingCache(); - // Build vob info cache for the bsp-leafs BuildBspVobMapCache(); + // Build instancing cache for the static vobs for each section + BuildStaticMeshInstancingCache(); #ifdef BUILD_GOTHIC_1_08k if ( LoadedWorldInfo->CustomWorldLoaded ) { CreatezCPolygonsForSections(); @@ -925,6 +925,7 @@ void GothicAPI::OnWorldLoaded() { #endif _canClearVobsByVisual = false; + Engine::GraphicsEngine->OnWorldLoaded(); } void GothicAPI::LoadRendererWorldSettings( GothicRendererSettings& s ) @@ -1947,7 +1948,7 @@ void GothicAPI::OnRemovedVob( zCVob* vob, zCWorld* world ) { SkeletalVobInfo* svi = SkeletalVobMap[vob]; // Tell all dynamic lights that we removed a vob they could have cached - for ( auto& vlit : VobLightMap ) { + for ( auto& vlit : VobLights_Sorted ) { if ( vi && vlit.second->LightShadowBuffers ) vlit.second->LightShadowBuffers->OnVobRemovedFromWorld( vi ); @@ -1955,7 +1956,12 @@ void GothicAPI::OnRemovedVob( zCVob* vob, zCWorld* world ) { vlit.second->LightShadowBuffers->OnVobRemovedFromWorld( svi ); } - VobLightInfo* li = VobLightMap[static_cast(vob)]; + VobLightInfo* li = nullptr; + { + auto lit = VobLights_Sorted.find( static_cast(vob) ); + if ( lit != VobLights_Sorted.end() ) + li = lit->second; + } // Erase it from the particle-effect list auto pit = std::find( ParticleEffectVobs.begin(), ParticleEffectVobs.end(), vob ); @@ -1971,7 +1977,7 @@ void GothicAPI::OnRemovedVob( zCVob* vob, zCWorld* world ) { } // Erase it from the list of lights - VobLightMap.erase( static_cast(vob) ); + VobLights_Sorted.erase( static_cast(vob) ); // Remove from BSP-Cache std::vector* nodes = nullptr; @@ -3886,7 +3892,7 @@ void GothicAPI::CollectVisibleVobs( ctx.drawDistances.OutdoorVobsSmall = RendererState.RendererSettings.OutdoorSmallVobDrawRadius; ctx.drawDistances.IndoorVobs = RendererState.RendererSettings.IndoorVobDrawRadius; ctx.drawDistances.VisualFX = RendererState.RendererSettings.VisualFXDrawRadius; - CollectVisibleVobs( ctx ); + CollectVisibleVobs( ctx, collectFlags ); if ( RendererState.RendererSettings.SortRenderQueue ) { struct SortableVob { @@ -3929,6 +3935,7 @@ void GothicAPI::CollectVisibleVobs( // they should be unique at this point. if ( collectFlags & COLLECT_MUTATE ) { + for ( auto it : renderQueue.vobs ) { VobInstanceInfo vii = {}; vii.world = it->WorldMatrix; @@ -4131,7 +4138,8 @@ static void CVVH_AddNotDrawnVobToList( BspTreeVobVisitor* visitor ) { const auto camPos = XMLoadFloat3( &ctx.cameraPosition ); - auto cullingEnabled = Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.Culling.CullVobs; + auto cullingEnabled = Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.Culling.CullVobs + && ctx.frustum.SupportsCulling(); auto distSq = dist * dist; for ( auto const& it : source ) { @@ -4164,7 +4172,8 @@ static void CVVH_AddNotDrawnVobToList( BspTreeVobVisitor* visitor) { const auto camPos = XMLoadFloat3( &ctx.cameraPosition ); - auto cullingEnabled = Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.Culling.CullVobs; + auto cullingEnabled = Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.Culling.CullVobs + && ctx.frustum.SupportsCulling(); auto vDistSq = XMVectorReplicate( dist * dist ); for ( auto const& it : source ) { @@ -4254,12 +4263,12 @@ void GothicAPI::BuildBspVobMapCacheHelper( zCBspBase* base ) { for ( int i = 0; i < leaf->LightVobList.NumInArray; i++ ) { zCVobLight* vob = leaf->LightVobList.Array[i]; - // Add the light to the map if not already done - auto vit = VobLightMap.find( vob ); - if ( vit == VobLightMap.end() ) { + // Add the light to the sorted vector if not already done + auto [vit, inserted] = VobLights_Sorted.insert( vob, nullptr ); + if ( inserted ) { VobLightInfo* vi = new VobLightInfo; vi->Vob = vob; - VobLightMap[vob] = vi; + vit->second = vi; float minDynamicUpdateLightRange = Engine::GAPI->GetRendererState().RendererSettings.MinLightShadowUpdateRange; if ( RendererState.RendererSettings.EnablePointlightShadows >= GothicRendererSettings::PLS_STATIC_ONLY @@ -5605,7 +5614,8 @@ static void CollectVisibleVobsHelper( BspInfo* base, const RndCullContext& ctx, BspTreeVobVisitor* visitor, DirectX::ContainmentType inheritedContainment, - float yMaxWorld + float yMaxWorld, + EBspTreeCollectFlags collectFlags = EBspTreeCollectFlags::COLLECT_ALL_NO_MUTATE ) { const float vobIndoorDist = ctx.drawDistances.IndoorVobs; const float vobOutdoorDist = ctx.drawDistances.OutdoorVobs; @@ -5613,15 +5623,15 @@ static void CollectVisibleVobsHelper( BspInfo* base, const float visualFXDrawRadius = ctx.drawDistances.VisualFX; const XMFLOAT3 camPos = ctx.cameraPosition; const FXMVECTOR cameraPosition = XMLoadFloat3( &camPos ); - EBspTreeCollectFlags collectFlags = EBspTreeCollectFlags::COLLECT_ALL_NO_MUTATE; int clipFlags = EGothicCullFlags::CullSidesNear; if ( ctx.stage == RenderStage::STAGE_DRAW_SHADOWS ) { - collectFlags = EBspTreeCollectFlags::COLLECT_VOBS; clipFlags = EGothicCullFlags::CullSidesNear; } + const bool checkDist = (collectFlags & COLLECT_DISABLE_CHECK_DIST) == 0; + const auto& RendererState = Engine::GAPI->GetRendererState(); - auto& VobLightMap = Engine::GAPI->VobLightMap; + auto& VobLights = Engine::GAPI->VobLights_Sorted; while ( base->OriginalNode ) { // Check for occlusion-culling if ( RendererState.RendererSettings.EnableOcclusionCulling && !base->OcclusionInfo.VisibleLastFrame ) { @@ -5633,7 +5643,9 @@ static void CollectVisibleVobsHelper( BspInfo* base, nodeYMax = std::max( nodeYMax, base->OriginalNode->BBox3D.Max.y ); nodeBox.Max.y = nodeYMax; - float dist = Toolbox::ComputePointAABBDistance( camPos, base->OriginalNode->BBox3D.Min, base->OriginalNode->BBox3D.Max ); + const float dist = checkDist + ? Toolbox::ComputePointAABBDistance( camPos, base->OriginalNode->BBox3D.Min, base->OriginalNode->BBox3D.Max ) + : 0; ContainmentType clipResult = inheritedContainment; if ( dist < vobOutdoorDist ) { if ( !RendererState.RendererSettings.EnableOcclusionCulling ) { @@ -5674,7 +5686,9 @@ static void CollectVisibleVobsHelper( BspInfo* base, std::vector& listC = base->Vobs; std::vector& listD = base->Mobs; - const float dist = Toolbox::ComputePointAABBDistance( camPos, base->OriginalNode->BBox3D.Min, base->OriginalNode->BBox3D.Max ); + const float dist = checkDist + ? Toolbox::ComputePointAABBDistance( camPos, base->OriginalNode->BBox3D.Min, base->OriginalNode->BBox3D.Max ) + : 0; if ( collectFlags & COLLECT_VOBS && RendererState.RendererSettings.DrawVOBs ) { @@ -5704,7 +5718,10 @@ static void CollectVisibleVobsHelper( BspInfo* base, for ( int i = 0; i < leaf->LightVobList.NumInArray; i++ ) { zCVobLight* vob = leaf->LightVobList.Array[i]; - const float lightCameraDist = XMVectorGetX( XMVector3Length( cameraPosition - vob->GetPositionWorldXM() ) ); + const float lightCameraDist = checkDist + ? XMVectorGetX( XMVector3Length( cameraPosition - vob->GetPositionWorldXM() ) ) + : 0; + if ( lightCameraDist + vob->GetLightRange() < visualFXDrawRadius ) { BoundingSphere lightSphere; @@ -5716,9 +5733,9 @@ static void CollectVisibleVobsHelper( BspInfo* base, continue; } - // Check if we already have this light - auto vit = VobLightMap.find( vob ); - if ( vit == VobLightMap.end() ) { + // Check if we already have this light, insert if new + auto [vit, inserted] = VobLights.insert( vob, nullptr ); + if ( inserted ) { bool PFXVobLight = false; if ( zCVob* parent = vob->GetVobParent() ) { if ( parent->As() ) { @@ -5726,12 +5743,12 @@ static void CollectVisibleVobsHelper( BspInfo* base, } } - // Add if not. This light must have been added during gameplay + // This light must have been added during gameplay VobLightInfo* vi = new VobLightInfo; vi->Vob = vob; vi->IsPFXVobLight = PFXVobLight; vi->UpdateShadows = !PFXVobLight; - vit = VobLightMap.emplace( vob, vi ).first; + vit->second = vi; // Create shadow-buffers for these lights since it was dynamically added to the world if ( !vi->IsPFXVobLight && RendererState.RendererSettings.EnablePointlightShadows >= GothicRendererSettings::PLS_STATIC_ONLY ) { @@ -5758,15 +5775,17 @@ static void CollectVisibleVobsHelper( BspInfo* base, boxCell.Max.y = node->BBox3D.Min.y; zTBBox3D tmpbox = boxCell; - float plane_normal; - XMStoreFloat( &plane_normal, XMVector3Dot( XMLoadFloat3( &node->Plane.Normal ), cameraPosition ) ); + float plane_normal = FLT_MAX; + if ( checkDist ) XMStoreFloat( &plane_normal, XMVector3Dot( XMLoadFloat3( &node->Plane.Normal ), cameraPosition ) ); + if ( plane_normal > node->Plane.Distance ) { if ( node->Front ) { reinterpret_cast(&tmpbox.Min)[planeAxis] = node->Plane.Distance; CollectVisibleVobsHelper( base->Front, tmpbox, ctx, visitor, clipResult, - yMaxWorld ); + yMaxWorld, + collectFlags); } reinterpret_cast(&boxCell.Max)[planeAxis] = node->Plane.Distance; @@ -5778,7 +5797,8 @@ static void CollectVisibleVobsHelper( BspInfo* base, CollectVisibleVobsHelper( base->Back, tmpbox, ctx, visitor, clipResult, - yMaxWorld ); + yMaxWorld, + collectFlags ); } reinterpret_cast(&boxCell.Min)[planeAxis] = node->Plane.Distance; @@ -5789,7 +5809,194 @@ static void CollectVisibleVobsHelper( BspInfo* base, } } -void GothicAPI::CollectVisibleVobs( const RndCullContext& ctx ) { +struct BspTraversalNode { + BspInfo* base; + zTBBox3D boxCell; + DirectX::ContainmentType inheritedContainment; +}; + +static void CollectVisibleVobsHelper2( BspInfo* base, + zTBBox3D boxCell, + const RndCullContext& ctx, + BspTreeVobVisitor* visitor, + DirectX::ContainmentType inheritedContainment, + float yMaxWorld, + EBspTreeCollectFlags collectFlags = EBspTreeCollectFlags::COLLECT_ALL_NO_MUTATE +) { + const float vobIndoorDist = ctx.drawDistances.IndoorVobs; + const float vobOutdoorDist = ctx.drawDistances.OutdoorVobs; + const float vobOutdoorSmallDist = ctx.drawDistances.OutdoorVobsSmall; + const float visualFXDrawRadius = ctx.drawDistances.VisualFX; + const XMFLOAT3 camPos = ctx.cameraPosition; + + const bool checkDist = (collectFlags & COLLECT_DISABLE_CHECK_DIST) == 0; + + // Cache globals outside the traversal loop to prevent redundant memory fetches + const auto& RendererState = Engine::GAPI->GetRendererState(); + auto& VobLights = Engine::GAPI->VobLights_Sorted; + + // Pre-allocate a small stack to eliminate recursion entirely + // 64 is exceptionally deep for a BSP tree, ensuring we won't overflow + BspTraversalNode stack[64]; + int stackPtr = 0; + + stack[stackPtr++] = { base, boxCell, inheritedContainment }; + + while ( stackPtr > 0 ) { + BspTraversalNode current = stack[--stackPtr]; + BspInfo* currBase = current.base; + zTBBox3D currBox = current.boxCell; + ContainmentType clipResult = current.inheritedContainment; + + // The original tail-recursion loop + while ( currBase && currBase->OriginalNode ) { + + if ( RendererState.RendererSettings.EnableOcclusionCulling && !currBase->OcclusionInfo.VisibleLastFrame ) { + break; // Proceed to next item in the stack + } + + zTBBox3D nodeBox = currBase->OriginalNode->BBox3D; + float nodeYMax = std::min( yMaxWorld, camPos.y ); + nodeYMax = std::max( nodeYMax, currBase->OriginalNode->BBox3D.Max.y ); + nodeBox.Max.y = nodeYMax; + + const float dist = checkDist + ? Toolbox::ComputePointAABBDistance( camPos, currBase->OriginalNode->BBox3D.Min, currBase->OriginalNode->BBox3D.Max ) + : 0; + + if ( dist < vobOutdoorDist ) { + if ( !RendererState.RendererSettings.EnableOcclusionCulling ) { + if ( clipResult != ContainmentType::CONTAINS ) { + clipResult = ctx.frustum.Contains( Frustum::BBoxFromzTBBox3D( nodeBox ) ); + } + } else { + switch ( static_cast( currBase->OcclusionInfo.LastCameraClipType ) ) { + case zTCam_ClipType::ZTCAM_CLIPTYPE_IN: clipResult = ContainmentType::CONTAINS; break; + case zTCam_ClipType::ZTCAM_CLIPTYPE_CROSSING: clipResult = ContainmentType::INTERSECTS; break; + case zTCam_ClipType::ZTCAM_CLIPTYPE_OUT: clipResult = ContainmentType::DISJOINT; break; + } + } + + if ( clipResult == ContainmentType::DISJOINT ) { + break; + } + } else { + break; // Too far + } + + if ( currBase->OriginalNode->IsLeaf() ) { + zCBspLeaf* leaf = static_cast(currBase->OriginalNode); + + if ( collectFlags & COLLECT_VOBS && RendererState.RendererSettings.DrawVOBs ) { + if ( collectFlags & COLLECT_INDOOR_VOBS && dist < vobIndoorDist ) { + CVVH_AddNotDrawnVobToList( currBase->IndoorVobs, vobIndoorDist, ctx, clipResult, visitor ); + } + if ( dist < vobOutdoorSmallDist ) { + CVVH_AddNotDrawnVobToList( currBase->SmallVobs, vobOutdoorSmallDist, ctx, clipResult, visitor ); + } + if ( dist < vobOutdoorDist ) { + CVVH_AddNotDrawnVobToList( currBase->Vobs, vobOutdoorDist, ctx, clipResult, visitor ); + } + } + + if ( collectFlags & COLLECT_MOBS && RendererState.RendererSettings.DrawMobs && dist < vobOutdoorSmallDist ) { + CVVH_AddNotDrawnVobToList( currBase->Mobs, vobOutdoorDist, ctx, clipResult, visitor ); + } + + if ( collectFlags & COLLECT_LIGHTS && RendererState.RendererSettings.EnableDynamicLighting && dist < visualFXDrawRadius ) { + for ( int i = 0; i < leaf->LightVobList.NumInArray; i++ ) { + zCVobLight* vob = leaf->LightVobList.Array[i]; + + // Avoid square root by using squared distances + bool inRange = false; + if ( checkDist ) { + float range = vob->GetLightRange(); + float threshold = visualFXDrawRadius - range; + + if ( threshold > 0.0f ) { + XMFLOAT3 vobPos = vob->GetPositionWorld(); + float dx = camPos.x - vobPos.x; + float dy = camPos.y - vobPos.y; + float dz = camPos.z - vobPos.z; + float distSq = dx * dx + dy * dy + dz * dz; + inRange = distSq < (threshold * threshold); + } + } else { + inRange = true; + } + + if ( inRange ) { + BoundingSphere lightSphere; + lightSphere.Center = vob->GetPositionWorld(); + lightSphere.Radius = vob->GetLightRange(); + + if ( clipResult != ContainmentType::CONTAINS && !ctx.frustum.Intersects( lightSphere ) ) { + continue; + } + + auto [vit, inserted] = VobLights.insert( vob, nullptr ); + if ( inserted ) { + bool PFXVobLight = false; + if ( zCVob* parent = vob->GetVobParent() ) { + if ( parent->As() ) PFXVobLight = true; + } + + VobLightInfo* vi = new VobLightInfo; + vi->Vob = vob; + vi->IsPFXVobLight = PFXVobLight; + vi->UpdateShadows = !PFXVobLight; + vit->second = vi; + + if ( !vi->IsPFXVobLight && RendererState.RendererSettings.EnablePointlightShadows >= GothicRendererSettings::PLS_STATIC_ONLY ) + Engine::GraphicsEngine->CreateShadowedPointLight( &vi->LightShadowBuffers, vi, true ); + } + + VobLightInfo* vi = vit->second; + if ( vi->VisibleInRenderPass ) continue; + visitor->Visit( vi ); + ctx.queue->PushLightVob( vi ); + } + } + } + break; // Break the inner tail-recursion loop to pop the next stack item + } else { + zCBspNode* node = static_cast(currBase->OriginalNode); + int planeAxis = node->PlaneSignbits; + + currBox.Min.y = node->BBox3D.Min.y; + currBox.Max.y = node->BBox3D.Max.y; + + zTBBox3D tmpbox = currBox; + float plane_normal = FLT_MAX; + + // Scalar math to avoid Load-Hit-Store SIMD stalls + if ( checkDist ) { + plane_normal = (node->Plane.Normal.x * camPos.x) + + (node->Plane.Normal.y * camPos.y) + + (node->Plane.Normal.z * camPos.z); + } + + if ( plane_normal > node->Plane.Distance ) { + if ( node->Front ) { + reinterpret_cast(&tmpbox.Min)[planeAxis] = node->Plane.Distance; + stack[stackPtr++] = { currBase->Front, tmpbox, clipResult }; + } + reinterpret_cast(&currBox.Max)[planeAxis] = node->Plane.Distance; + currBase = currBase->Back; + } else { + if ( node->Back ) { + reinterpret_cast(&tmpbox.Max)[planeAxis] = node->Plane.Distance; + stack[stackPtr++] = { currBase->Back, tmpbox, clipResult }; + } + reinterpret_cast(&currBox.Min)[planeAxis] = node->Plane.Distance; + currBase = currBase->Front; + } + } + } + } +} + +void GothicAPI::CollectVisibleVobs( const RndCullContext& ctx, EBspTreeCollectFlags collectFlags ) { zCBspTree* tree = LoadedWorldInfo->BspTree; zCBspBase* rootBsp = tree->GetRootNode(); @@ -5798,11 +6005,12 @@ void GothicAPI::CollectVisibleVobs( const RndCullContext& ctx ) { static thread_local BspTreeVobVisitor bspVobVisitor{}; // Recursively go through the tree and draw all nodes - CollectVisibleVobsHelper( root, root->OriginalNode->BBox3D, + CollectVisibleVobsHelper2( root, root->OriginalNode->BBox3D, ctx, &bspVobVisitor, ContainmentType::INTERSECTS, - Engine::GAPI->GetLoadedWorldInfo()->BspTree->GetRootNode()->BBox3D.Max.y + Engine::GAPI->GetLoadedWorldInfo()->BspTree->GetRootNode()->BBox3D.Max.y, + collectFlags ); FXMVECTOR camPos = XMLoadFloat3( &ctx.cameraPosition ); @@ -5816,7 +6024,8 @@ void GothicAPI::CollectVisibleVobs( const RndCullContext& ctx ) { std::list removeList; // TODO: This should not be needed! // Add visible dynamically added vobs - if ( RendererState.RendererSettings.DrawVOBs ) { + if ( RendererState.RendererSettings.DrawVOBs + && (collectFlags & EBspTreeCollectFlags::COLLECT_DYNAMIC_VOBS)) { float dist; for ( VobInfo* it : DynamicallyAddedVobs ) { if ( it->VisibleInRenderPass ) continue; diff --git a/D3D11Engine/GothicAPI.h b/D3D11Engine/GothicAPI.h index 0f4a1239..40b8d46b 100644 --- a/D3D11Engine/GothicAPI.h +++ b/D3D11Engine/GothicAPI.h @@ -11,6 +11,7 @@ #include "RenderQueue.h" #include "RenderToTextureBuffer.h" #include "ShaderIDs.h" +#include "StaticVOBCache.h" #define START_TIMING(x) TimerScope( x, &Engine::GAPI->GetRendererState().RendererInfo.Timing.frameRecordings ) @@ -49,15 +50,17 @@ struct RndCullContext { }; enum EBspTreeCollectFlags : unsigned int { - COLLECT_VOBS = 1 << 0, + COLLECT_VOBS = 1 << 0, // static vobs COLLECT_LIGHTS = 1 << 1, - COLLECT_MOBS = 1 << 2, - COLLECT_INDOOR_VOBS = 1 << 3, + COLLECT_MOBS = 1 << 2, // skeletal mobs + COLLECT_INDOOR_VOBS = 1 << 3, // indoor vobs + COLLECT_DYNAMIC_VOBS = 1 << 4, // dynamic static / transparent vobs - COLLECT_ALL_VOBS = COLLECT_VOBS | COLLECT_INDOOR_VOBS, + COLLECT_ALL_VOBS = COLLECT_VOBS | COLLECT_INDOOR_VOBS | COLLECT_DYNAMIC_VOBS, + COLLECT_DISABLE_CHECK_DIST = 1 << 29, COLLECT_MUTATE = 1 << 30, - COLLECT_ALL_MUTATE = 0xFFFFFFFF, + COLLECT_ALL_MUTATE = 0xFFFFFFFF & ~(COLLECT_DISABLE_CHECK_DIST), COLLECT_ALL_NO_MUTATE = COLLECT_ALL_MUTATE & ~COLLECT_MUTATE, }; @@ -225,6 +228,54 @@ class GVegetationBox; class zCMorphMesh; class zCDecal; +// Minimal flat-map: always-sorted vector of pairs for O(log n) binary-search lookups. +// All methods inline to the same lower_bound calls — zero overhead over hand-written code. +template +struct SortedPairVector { + using Entry = std::pair; + using iterator = typename std::vector::iterator; + using const_iterator = typename std::vector::const_iterator; + + iterator begin() { return m_data.begin(); } + iterator end() { return m_data.end(); } + const_iterator begin() const { return m_data.begin(); } + const_iterator end() const { return m_data.end(); } + + // Binary search for key. Returns end() if not found. + __forceinline iterator find( Key key ) { + auto it = std::lower_bound( m_data.begin(), m_data.end(), key, Cmp{} ); + return (it != m_data.end() && it->first == key) ? it : m_data.end(); + } + + // Insert {key, value} maintaining sort order. If key already exists, does nothing. + // Returns {iterator_to_element, true_if_newly_inserted}. + __forceinline std::pair insert( Key key, Value value ) { + auto it = std::lower_bound( m_data.begin(), m_data.end(), key, Cmp{} ); + if ( it != m_data.end() && it->first == key ) + return { it, false }; + return { m_data.insert( it, { key, value } ), true }; + } + + // Erase by key. Returns true if found and erased. + __forceinline bool erase( Key key ) { + auto it = find( key ); + if ( it == m_data.end() ) return false; + m_data.erase( it ); + return true; + } + + void clear() { m_data.clear(); } + bool empty() const { return m_data.empty(); } + size_t size() const { return m_data.size(); } + void reserve( size_t n ) { m_data.reserve( n ); } + +private: + struct Cmp { + bool operator()( const Entry& a, Key k ) const { return a.first < k; } + }; + std::vector m_data; +}; + class GothicAPI { public: GothicAPI(); @@ -569,7 +620,7 @@ class GothicAPI { EGothicCullFlags cullFlags = EGothicCullFlags::CullAll, EBspTreeCollectFlags collectFlags = EBspTreeCollectFlags::COLLECT_ALL_MUTATE); - void CollectVisibleVobs( const RndCullContext& ctx ); + void CollectVisibleVobs(const RndCullContext& ctx, EBspTreeCollectFlags collectFlags = EBspTreeCollectFlags::COLLECT_ALL_NO_MUTATE); /** Collects visible sections from the current camera perspective */ void CollectVisibleSections( std::vector& sections ); @@ -789,7 +840,6 @@ class GothicAPI { float GetSkyTimeScale(); static void ProcessVobAnimation( zCVob* vob, zTAnimationMode aniMode, VobInstanceInfo& vobInstance ); - private: /** Collects polygons in the given AABB */ void CollectPolygonsInAABBRec( BspInfo* base, const zTBBox3D& bbox, std::vector& list ); @@ -879,7 +929,8 @@ class GothicAPI { std::unordered_map VobMap; public: // temporarily, to allow CollectVisibleVobsHelper to be templated for inlining optimizations - phmap::flat_hash_map VobLightMap; + // Sorted by zCVobLight* for binary-search lookups + SortedPairVector VobLights_Sorted; private: phmap::flat_hash_map SkeletalVobMap; diff --git a/D3D11Engine/GothicGraphicsState.h b/D3D11Engine/GothicGraphicsState.h index c62b4820..6e23c260 100644 --- a/D3D11Engine/GothicGraphicsState.h +++ b/D3D11Engine/GothicGraphicsState.h @@ -640,6 +640,7 @@ struct GothicRendererSettings { WireframeVobs = false; WireframeWorld = false; DrawShadowGeometry = true; + UseIndirectVobShadows = false; FixViewFrustum = false; DisableWatermark = true; DisableRendering = false; @@ -828,6 +829,7 @@ struct GothicRendererSettings { int ShadowCascadePCFLimit; E_ShadowFrustumCulling ShadowFrustumCullingMode; bool DrawShadowGeometry; + bool UseIndirectVobShadows; bool VegetationAlphaToCoverage; bool DisableWatermark; bool DisableRendering; diff --git a/D3D11Engine/ImGuiShim.cpp b/D3D11Engine/ImGuiShim.cpp index 4be20a31..6b480e98 100644 --- a/D3D11Engine/ImGuiShim.cpp +++ b/D3D11Engine/ImGuiShim.cpp @@ -1321,6 +1321,7 @@ void RenderAdvancedColumn2( GothicRendererSettings& settings, GothicAPI* gapi ) if (ImGui::BeginTabItem("Shadows", nullptr, ImGuiTabItemFlags_::ImGuiTabItemFlags_NoReorder)) { ImGui::Checkbox("Lazy update", &settings.DebugSettings.ShadowCascades.LazyCascadeUpdate ); ImGui::SetItemTooltip("Update last cascades less frequently to save performance, may cause uneven frametimes"); + ImGui::Checkbox("Indirect", &settings.UseIndirectVobShadows ); ImGui::SliderFloat("Extend Back", &settings.DebugSettings.ShadowCascades.ExtendBack, -10000, 50000, "%.0f"); ImGui::SliderFloat("Extend Front", &settings.DebugSettings.ShadowCascades.ExtendFront, -10000, 50000, "%.0f"); diff --git a/D3D11Engine/ShaderIDs.h b/D3D11Engine/ShaderIDs.h index 6d42ef02..a2debebc 100644 --- a/D3D11Engine/ShaderIDs.h +++ b/D3D11Engine/ShaderIDs.h @@ -17,6 +17,7 @@ enum class VShaderID : size_t { VS_XYZRHW_DIF_T1, VS_ExInstancedObj, VS_ExInstanced, + VS_ExInstancedObjIndirectAtlas, VS_GrassInstanced, VS_Lines, VS_Lines_XYZRHW, @@ -77,6 +78,8 @@ enum class PShaderID : size_t { PS_DiffuseAlphaTestShadows, PS_DiffuseNormalmappedAlphaTest, PS_DiffuseNormalmappedAlphaTestFxMap, + PS_DiffuseAtlas, + PS_DiffuseAtlasAlphaTest, PS_Preview_White, PS_Preview_Textured, PS_Preview_TexturedLit, @@ -109,5 +112,6 @@ enum class CShaderID : size_t { CS_AdvanceRain, CS_LightCulling, CS_TiledShading, + CS_CullVobs, COUNT }; diff --git a/D3D11Engine/Shaders/CS_CullVobs.hlsl b/D3D11Engine/Shaders/CS_CullVobs.hlsl new file mode 100644 index 00000000..929a9025 --- /dev/null +++ b/D3D11Engine/Shaders/CS_CullVobs.hlsl @@ -0,0 +1,130 @@ +//-------------------------------------------------------------------------------------- +// GPU Frustum + Distance Culling Compute Shader +// Tests each vob AABB against 6 frustum planes + draw distance, +// writes visible instances to RWStructuredBuffer and atomically +// increments InstanceCount in the indirect args buffer. +//-------------------------------------------------------------------------------------- + +cbuffer CullCB : register( b0 ) +{ + float4 frustumPlanes[6]; + float3 cameraPosition; + float drawDistance; + float globalWindStrength; + uint windAdvanced; + uint numVobs; + uint pad; +}; + +struct VobGPUData +{ + float3 aabbCenter; + float pad0; + float3 aabbExtent; + float pad1; + float4x4 world; + float4x4 prevWorld; + uint color; + float aniModeStrength; + float canBeAffectedByPlayer; + uint submeshStart; + uint submeshCount; + uint pad2[3]; +}; + +struct SubmeshGPUData +{ + int slice; + float uStart; + float vStart; + float uEnd; + float vEnd; + uint argIndex; + uint instanceBaseOffset; + uint pad; +}; + +struct VobInstanceInfoAtlas +{ + float4x4 world; + float4x4 prevWorld; + uint color; + float windStrength; + float canBeAffectedByPlayer; + int slice; + float uStart; + float vStart; + float uEnd; + float vEnd; +}; + +StructuredBuffer VobBuffer : register( t0 ); +StructuredBuffer SubmeshBuffer : register( t1 ); +RWStructuredBuffer InstanceOutput : register( u0 ); +RWByteAddressBuffer IndirectArgsUAV : register( u1 ); + +[numthreads( 64, 1, 1 )] +void CSMain( uint3 DTid : SV_DispatchThreadID ) +{ + uint idx = DTid.x; + if ( idx >= numVobs ) + return; + + VobGPUData vob = VobBuffer[idx]; + + // Draw distance cull (center-to-camera distance) + float3 toCamera = vob.aabbCenter - cameraPosition; + float distSq = dot( toCamera, toCamera ); + if ( distSq > drawDistance * drawDistance ) + return; + + // Frustum cull: 6-plane AABB test + [unroll] + for ( int p = 0; p < 6; p++ ) + { + float3 n = frustumPlanes[p].xyz; + float d = frustumPlanes[p].w; + float r = dot( abs( n ), vob.aabbExtent ); + float s = dot( n, vob.aabbCenter ) + d; + if ( s - r > 0.0 ) + return; // fully outside this plane + } + + // Compute wind strength for this vob + float windStr = 0.0; + if ( vob.aniModeStrength > 0.0 && windAdvanced ) + { + windStr = max( 0.1, vob.aniModeStrength ) * globalWindStrength; + } + + // Emit one instance per submesh of this vob + for ( uint s = 0; s < vob.submeshCount; s++ ) + { + SubmeshGPUData sm = SubmeshBuffer[vob.submeshStart + s]; + + // Atomic increment InstanceCount in the indirect args buffer. + // Each D3D11_DRAW_INDEXED_INSTANCED_INDIRECT_ARGS is 20 bytes (5 x uint32): + // [0] IndexCountPerInstance + // [4] InstanceCount <-- we increment this + // [8] StartIndexLocation + // [12] BaseVertexLocation + // [16] StartInstanceLocation + uint slot; + IndirectArgsUAV.InterlockedAdd( sm.argIndex * 20 + 4, 1, slot ); + + // Write instance data at the pre-allocated offset + atomic slot + VobInstanceInfoAtlas inst; + inst.world = vob.world; + inst.prevWorld = vob.prevWorld; + inst.color = vob.color; + inst.windStrength = windStr; + inst.canBeAffectedByPlayer = vob.canBeAffectedByPlayer; + inst.slice = sm.slice; + inst.uStart = sm.uStart; + inst.vStart = sm.vStart; + inst.uEnd = sm.uEnd; + inst.vEnd = sm.vEnd; + + InstanceOutput[sm.instanceBaseOffset + slot] = inst; + } +} diff --git a/D3D11Engine/Shaders/PS_DiffuseAtlas.hlsl b/D3D11Engine/Shaders/PS_DiffuseAtlas.hlsl new file mode 100644 index 00000000..05c38975 --- /dev/null +++ b/D3D11Engine/Shaders/PS_DiffuseAtlas.hlsl @@ -0,0 +1,110 @@ +//-------------------------------------------------------------------------------------- +// Atlas pixel shader for static vobs +// Samples from Texture2DArray using (u, v, slice) from vertex shader +//-------------------------------------------------------------------------------------- +#include +#include +#include +#include + +cbuffer MI_MaterialInfo : register( b2 ) +{ + float MI_SpecularIntensity; + float MI_SpecularPower; + float MI_NormalmapStrength; + float MI_ParallaxOcclusionStrength; + + float4 MI_Color; +} + +cbuffer DIST_Distance : register( b3 ) +{ + float DIST_DrawDistance; + float3 DIST_Pad; +} + +//-------------------------------------------------------------------------------------- +// Textures and Samplers +//-------------------------------------------------------------------------------------- +SamplerState SS_Linear : register( s0 ); +SamplerState SS_samMirror : register( s1 ); +Texture2DArray TX_AtlasArray : register( t0 ); +Texture2D TX_Texture1 : register( t1 ); +Texture2D TX_Texture2 : register( t2 ); +TextureCube TX_ReflectionCube : register( t4 ); + +//-------------------------------------------------------------------------------------- +// Input / Output structures +//-------------------------------------------------------------------------------------- +struct PS_INPUT +{ + float3 vTexcoord3D : TEXCOORD0; // (rawU, rawV, slice) + float2 vTexcoord2 : TEXCOORD1; + float4 vDiffuse : TEXCOORD2; + float4 vAtlasRect : TEXCOORD3; // (uStart, vStart, uEnd, vEnd) + float3 vNormalVS : TEXCOORD4; + float3 vViewPosition : TEXCOORD5; + float4 vCurrClipPos : TEXCOORD6; + float4 vPrevClipPos : TEXCOORD7; + float4 vPosition : SV_POSITION; +}; + +// Calculate screen-space velocity from clip positions +float2 CalculateVelocity(float4 currClipPos, float4 prevClipPos) +{ + if (currClipPos.w == 0.0 || prevClipPos.w == 0.0) + return float2(0, 0); + + float2 currNDC = currClipPos.xy / currClipPos.w; + float2 prevNDC = prevClipPos.xy / prevClipPos.w; + + float2 currUV = float2(currNDC.x * 0.5 + 0.5, 1.0 - (currNDC.y * 0.5 + 0.5)); + float2 prevUV = float2(prevNDC.x * 0.5 + 0.5, 1.0 - (prevNDC.y * 0.5 + 0.5)); + + return prevUV - currUV; +} + +//-------------------------------------------------------------------------------------- +// Pixel Shader +//-------------------------------------------------------------------------------------- +DEFERRED_PS_OUTPUT PSMain( PS_INPUT Input ) : SV_TARGET +{ + DEFERRED_PS_OUTPUT output; + output.vReactiveMask = 0.0f; + + // Per-pixel atlas UV remapping: avoids frac() interpolation collapse in the VS + // (frac(1.0)=0.0 in VS causes entire [0,1] UV range to collapse to a single texel). + // SampleGrad uses gradients from the raw (pre-frac) UVs so MIP selection stays correct + // even at UV wrap boundaries where frac() would create huge derivative discontinuities. + float2 rawUV = Input.vTexcoord3D.xy; + float slice = Input.vTexcoord3D.z; + float2 atlasScale = Input.vAtlasRect.zw - Input.vAtlasRect.xy; // (uEnd-uStart, vEnd-vStart) + + float2 gradX = ddx(rawUV) * atlasScale; + float2 gradY = ddy(rawUV) * atlasScale; + float2 atlasUV = Input.vAtlasRect.xy + frac(rawUV) * atlasScale; + + float4 color = TX_AtlasArray.SampleGrad(SS_Linear, float3(atlasUV, slice), gradX, gradY); + +#if ALPHATEST == 1 + ClipDistanceEffect(length(Input.vViewPosition), DIST_DrawDistance, color.r * 2 - 1, 500.0f); + DoAlphaTest(color.a); + output.vReactiveMask = 0.1f; +#endif + + float3 nrm = normalize(Input.vNormalVS); + + float4 fx = 1.0f; + + output.vDiffuse = float4(color.rgb, Input.vDiffuse.y); + + output.vNrm.xyz = nrm; + output.vNrm.w = 1.0f; + + output.vSI_SP.x = MI_SpecularIntensity * fx.r; + output.vSI_SP.y = MI_SpecularPower * fx.g; + + output.vVelocity = CalculateVelocity(Input.vCurrClipPos, Input.vPrevClipPos); + + return output; +} diff --git a/D3D11Engine/Shaders/VS_ExInstancedObjIndirect.hlsl b/D3D11Engine/Shaders/VS_ExInstancedObjIndirect.hlsl new file mode 100644 index 00000000..0b063d50 --- /dev/null +++ b/D3D11Engine/Shaders/VS_ExInstancedObjIndirect.hlsl @@ -0,0 +1,217 @@ +//-------------------------------------------------------------------------------------- +// Simple vertex shader +//-------------------------------------------------------------------------------------- + +#include "Globals_VS_ExConstants.h" + +cbuffer Matrices_PerFrame : register( b0 ) +{ + VS_ExConstantBuffer_PerFrame frame; +}; + +cbuffer WindParams : register(b1) +{ + float3 windDir; + float globalTime; + float minHeight; + float maxHeight; + float2 padding0; + float3 playerPos; + float padding1; +}; + +StructuredBuffer instances : register(t1); + +// Unpack DWORD color (R8G8B8A8_UNORM layout) to float4 +float4 UnpackColor(uint packed) +{ + return float4( + float(packed & 0xFF) / 255.0, + float((packed >> 8) & 0xFF) / 255.0, + float((packed >> 16) & 0xFF) / 255.0, + float((packed >> 24) & 0xFF) / 255.0 + ); +} + +//-------------------------------------------------------------------------------------- +// Input / Output structures +//-------------------------------------------------------------------------------------- +struct VS_INPUT +{ + float3 vPosition : POSITION; + float3 vNormal : NORMAL; + float2 vTex1 : TEXCOORD0; + float2 vTex2 : TEXCOORD1; + float4 vDiffuse : DIFFUSE; + + // The Input Assembler automatically adds StartInstanceLocation to this fetch! + uint instanceID : INSTANCE_ID; +}; + +struct VS_OUTPUT +{ + float2 vTexcoord : TEXCOORD0; + float2 vTexcoord2 : TEXCOORD1; + float4 vDiffuse : TEXCOORD2; + float3 vNormalVS : TEXCOORD4; + float3 vViewPosition : TEXCOORD5; + float4 vCurrClipPos : TEXCOORD6; // Current clip position for velocity + float4 vPrevClipPos : TEXCOORD7; // Previous clip position for velocity + + float4 vPosition : SV_POSITION; +}; + +#if SHD_WIND + +//less then trunkStiffness (%) will be absolutely stay, like tree trunk +static const float trunkStiffness = 0.12f; +static const float phaseVariation = 0.40f; +static const float windStrengMult = 16.0f; // original engine uses [0.1 -> 5] range, we use higher values in formulas +static const float PI_2 = 6.283185; // 2 * PI + +float GetInstancePhaseOffset(float4x4 objMatrix) +{ + // Random seed by object's matrix + // Combine object matrix and maxHeight for more stable randomness + float seed = dot(objMatrix._11_22_33, float3(12.9898, 78.233, 53.539)) + maxHeight; + return frac(sin(seed) * 43758.5453) * phaseVariation; +} + +float3 ApplyTreeWind(float3 vertexPos, float3 direction, float heightNorm, float timeSec, float4x4 instMatrix, float windStrength) +{ + // Calculate if vertex should be affected (1 if heightNorm >= trunkStiffness, 0 otherwise) + float shouldAffect = saturate(sign(heightNorm - trunkStiffness + 0.0001f)); + + float instancePhase = GetInstancePhaseOffset(instMatrix) * PI_2; + + // Smooth height factor with more natural falloff + float adjustedHeight = saturate((heightNorm - trunkStiffness) / (1.0 - trunkStiffness)) * shouldAffect; + float heightFactor = pow(adjustedHeight, 2.6f); + + // Main wave + float mainWave = sin(timeSec * 1.0 + heightNorm * 3.0 + instancePhase) * 0.8; + + // Second wave + float secondaryWave = cos(timeSec * 0.7 + heightNorm * 5.0 + instancePhase * 1.5) * 0.80; + + // Inertia + float inertiaEffect = sin(timeSec * 0.3 + heightNorm * 8.0) * 0.1; + + // Height amplitude + float topSmoothing = smoothstep(0.7, 0.9, adjustedHeight); + + // Combine waves + float combinedWave = (mainWave + secondaryWave * 0.5) * (1.0 - topSmoothing * 0.3) + inertiaEffect * topSmoothing; + + // Chaotical motion + float leafTurbulence = (sin(timeSec * 4.0 + vertexPos.x * 15.0) + + cos(timeSec * 3.7 + vertexPos.z * 12.0)) * 0.05 * topSmoothing; + + // Final offset + float3 windOffset = direction * windStrength * windStrengMult * + (combinedWave + leafTurbulence) * heightFactor; + + return windOffset; +} +#endif + +#if SHD_INFLUENCE + +// HERO AFFECTS CONST +static const float heroAffectRange = 100.0f; +static const float heroAffectStrength = 38.0f; + +float3 CalculatePlayerInfluence( + float3 playerPos, + float3 vertexLocalPos, + float minHeight, + float maxHeight, + float4x4 instWorldMatrix +) +{ + float heightRange = max(maxHeight - minHeight, 0.001); + float vertexHeightNorm = saturate((vertexLocalPos.y - minHeight) / heightRange); + + // 15% of object height check + float heightMask = smoothstep(0.14, 0.16, vertexHeightNorm); + + float3 vertexWorldPos = mul(float4(vertexLocalPos, 1.0), instWorldMatrix).xyz; + float3 toVertex = vertexWorldPos - playerPos; + + float3 displaceDirWorld = lerp(float3(0, 1, 0), normalize(toVertex), step(0.001, length(toVertex))); + + float distanceXZ = length(toVertex.xz); + float distanceFactor = exp(-(distanceXZ*distanceXZ)/(1.8*heroAffectRange*heroAffectRange)); + + float influence = distanceFactor * vertexHeightNorm * heightMask; + + float randomOffset = frac(sin(dot(vertexLocalPos.xz, float2(12.9898, 78.233))) * 43758.5453); + influence *= 0.9 + 0.1 * randomOffset; + + float3 displaceDirLocal = normalize(mul(displaceDirWorld, (float3x3)instWorldMatrix)); + return displaceDirLocal * heroAffectStrength * influence; +} +#endif + +//-------------------------------------------------------------------------------------- +// Vertex Shader +//-------------------------------------------------------------------------------------- +VS_OUTPUT VSMain( VS_INPUT Input ) +{ + VS_OUTPUT Output; + + // Base vertex position (local) + float3 position = Input.vPosition; + VobInstanceInfo instance = instances[Input.instanceID]; + +#if SHD_INFLUENCE + + if (instance.canBeAffectedByPlayer > 0) + { + // HERO MOVING BUSHES SHADER + position += CalculatePlayerInfluence(playerPos, position, minHeight, maxHeight, instance.world); + } +#endif + +#if SHD_WIND + + if (instance.windStrenth > 0) + { + // WIND SHADER + // Protect 0 height + float heightRange = max(maxHeight - minHeight, 0.001); + float vertexHeightNorm = saturate((Input.vPosition.y - minHeight) / heightRange); + + // Apply wind + position += ApplyTreeWind( + Input.vPosition, + normalize(windDir), + vertexHeightNorm, + globalTime, + instance.world, + instance.windStrenth + ); + } +#endif + + // Common processing for both cases + float3 worldPos = mul(float4(position, 1.0), instance.world).xyz; + + // Calculate previous world position for motion vectors + float3 prevWorldPos = mul(float4(position, 1.0), instance.prevWorld).xyz; + + Output.vPosition = mul(float4(worldPos, 1.0), frame.M_ViewProj); + Output.vTexcoord = Input.vTex1; + Output.vTexcoord2 = Input.vTex2; + Output.vDiffuse = UnpackColor(instance.color); + Output.vNormalVS = mul(Input.vNormal, mul((float3x3)instance.world, (float3x3)frame.M_View)); + Output.vViewPosition = mul(float4(worldPos, 1.0), frame.M_View); + + // Store clip positions for velocity calculation in pixel shader + // Use UNJITTERED matrices for correct velocity (jitter would cause incorrect motion) + Output.vCurrClipPos = mul(float4(worldPos, 1.0), frame.M_UnjitteredViewProj); + Output.vPrevClipPos = mul(float4(prevWorldPos, 1.0), frame.M_PrevViewProj); + + return Output; +} + diff --git a/D3D11Engine/Shaders/VS_ExInstancedObjIndirectAtlas.hlsl b/D3D11Engine/Shaders/VS_ExInstancedObjIndirectAtlas.hlsl new file mode 100644 index 00000000..3f365e91 --- /dev/null +++ b/D3D11Engine/Shaders/VS_ExInstancedObjIndirectAtlas.hlsl @@ -0,0 +1,211 @@ +//-------------------------------------------------------------------------------------- +// Instanced vertex shader for atlas indirect draw path +// Uses StructuredBuffer for per-instance data including atlas UV rect +//-------------------------------------------------------------------------------------- + +#include "Globals_VS_ExConstants.h" + +cbuffer Matrices_PerFrame : register( b0 ) +{ + VS_ExConstantBuffer_PerFrame frame; +}; + +cbuffer WindParams : register(b1) +{ + float3 windDir; + float globalTime; + float minHeight; + float maxHeight; + float2 padding0; + float3 playerPos; + float padding1; +}; + +struct VobInstanceInfoAtlas { + float4x4 world; + float4x4 prevWorld; + uint color; + float windStrength; + float canBeAffectedByPlayer; + int slice; + float uStart; + float vStart; + float uEnd; + float vEnd; +}; + +StructuredBuffer instances : register(t1); + +// Unpack DWORD color (R8G8B8A8_UNORM layout) to float4 +float4 UnpackColor(uint packed) +{ + return float4( + float(packed & 0xFF) / 255.0, + float((packed >> 8) & 0xFF) / 255.0, + float((packed >> 16) & 0xFF) / 255.0, + float((packed >> 24) & 0xFF) / 255.0 + ); +} + +//-------------------------------------------------------------------------------------- +// Input / Output structures +//-------------------------------------------------------------------------------------- +struct VS_INPUT +{ + float3 vPosition : POSITION; + float3 vNormal : NORMAL; + float2 vTex1 : TEXCOORD0; + float2 vTex2 : TEXCOORD1; + float4 vDiffuse : DIFFUSE; + + // The Input Assembler automatically adds StartInstanceLocation to this fetch! + uint instanceID : INSTANCE_REMAP_INDEX; +}; + +struct VS_OUTPUT +{ + float3 vTexcoord3D : TEXCOORD0; // (rawU, rawV, slice) — raw UVs passed to PS for per-pixel atlas remap + float2 vTexcoord2 : TEXCOORD1; + float4 vDiffuse : TEXCOORD2; + float4 vAtlasRect : TEXCOORD3; // (uStart, vStart, uEnd, vEnd) — atlas sub-rect for PS remap + float3 vNormalVS : TEXCOORD4; + float3 vViewPosition : TEXCOORD5; + float4 vCurrClipPos : TEXCOORD6; + float4 vPrevClipPos : TEXCOORD7; + + float4 vPosition : SV_POSITION; +}; + +#if SHD_WIND + +//less then trunkStiffness (%) will be absolutely stay, like tree trunk +static const float trunkStiffness = 0.12f; +static const float phaseVariation = 0.40f; +static const float windStrengMult = 16.0f; +static const float PI_2 = 6.283185; + +float GetInstancePhaseOffset(float4x4 objMatrix) +{ + float seed = dot(objMatrix._11_22_33, float3(12.9898, 78.233, 53.539)) + maxHeight; + return frac(sin(seed) * 43758.5453) * phaseVariation; +} + +float3 ApplyTreeWind(float3 vertexPos, float3 direction, float heightNorm, float timeSec, float4x4 instMatrix, float windStrength) +{ + float shouldAffect = saturate(sign(heightNorm - trunkStiffness + 0.0001f)); + + float instancePhase = GetInstancePhaseOffset(instMatrix) * PI_2; + + float adjustedHeight = saturate((heightNorm - trunkStiffness) / (1.0 - trunkStiffness)) * shouldAffect; + float heightFactor = pow(adjustedHeight, 2.6f); + + float mainWave = sin(timeSec * 1.0 + heightNorm * 3.0 + instancePhase) * 0.8; + float secondaryWave = cos(timeSec * 0.7 + heightNorm * 5.0 + instancePhase * 1.5) * 0.80; + float inertiaEffect = sin(timeSec * 0.3 + heightNorm * 8.0) * 0.1; + + float topSmoothing = smoothstep(0.7, 0.9, adjustedHeight); + float combinedWave = (mainWave + secondaryWave * 0.5) * (1.0 - topSmoothing * 0.3) + inertiaEffect * topSmoothing; + + float leafTurbulence = (sin(timeSec * 4.0 + vertexPos.x * 15.0) + + cos(timeSec * 3.7 + vertexPos.z * 12.0)) * 0.05 * topSmoothing; + + float3 windOffset = direction * windStrength * windStrengMult * + (combinedWave + leafTurbulence) * heightFactor; + + return windOffset; +} +#endif + +#if SHD_INFLUENCE + +static const float heroAffectRange = 100.0f; +static const float heroAffectStrength = 38.0f; + +float3 CalculatePlayerInfluence( + float3 playerPos, + float3 vertexLocalPos, + float minHeight, + float maxHeight, + float4x4 instWorldMatrix +) +{ + float heightRange = max(maxHeight - minHeight, 0.001); + float vertexHeightNorm = saturate((vertexLocalPos.y - minHeight) / heightRange); + + float heightMask = smoothstep(0.14, 0.16, vertexHeightNorm); + + float3 vertexWorldPos = mul(float4(vertexLocalPos, 1.0), instWorldMatrix).xyz; + float3 toVertex = vertexWorldPos - playerPos; + + float3 displaceDirWorld = lerp(float3(0, 1, 0), normalize(toVertex), step(0.001, length(toVertex))); + + float distanceXZ = length(toVertex.xz); + float distanceFactor = exp(-(distanceXZ*distanceXZ)/(1.8*heroAffectRange*heroAffectRange)); + + float influence = distanceFactor * vertexHeightNorm * heightMask; + + float randomOffset = frac(sin(dot(vertexLocalPos.xz, float2(12.9898, 78.233))) * 43758.5453); + influence *= 0.9 + 0.1 * randomOffset; + + float3 displaceDirLocal = normalize(mul(displaceDirWorld, (float3x3)instWorldMatrix)); + return displaceDirLocal * heroAffectStrength * influence; +} +#endif + +//-------------------------------------------------------------------------------------- +// Vertex Shader +//-------------------------------------------------------------------------------------- +VS_OUTPUT VSMain( VS_INPUT Input ) +{ + VS_OUTPUT Output; + + float3 position = Input.vPosition; + VobInstanceInfoAtlas inst = instances[Input.instanceID]; + +#if SHD_INFLUENCE + + if (inst.canBeAffectedByPlayer > 0) + { + position += CalculatePlayerInfluence(playerPos, position, minHeight, maxHeight, inst.world); + } +#endif + +#if SHD_WIND + + if (inst.windStrength > 0) + { + float heightRange = max(maxHeight - minHeight, 0.001); + float vertexHeightNorm = saturate((Input.vPosition.y - minHeight) / heightRange); + + position += ApplyTreeWind( + Input.vPosition, + normalize(windDir), + vertexHeightNorm, + globalTime, + inst.world, + inst.windStrength + ); + } +#endif + + // World-space transform + float3 worldPos = mul(float4(position, 1.0), inst.world).xyz; + float3 prevWorldPos = mul(float4(position, 1.0), inst.prevWorld).xyz; + + Output.vPosition = mul(float4(worldPos, 1.0), frame.M_ViewProj); + + // Pass raw UVs + slice to PS; atlas remapping done per-pixel to avoid frac() interpolation artifacts + Output.vTexcoord3D = float3(Input.vTex1, (float)inst.slice); + Output.vAtlasRect = float4(inst.uStart, inst.vStart, inst.uEnd, inst.vEnd); + + Output.vTexcoord2 = Input.vTex2; + Output.vDiffuse = UnpackColor(inst.color); + Output.vNormalVS = mul(Input.vNormal, mul((float3x3)inst.world, (float3x3)frame.M_View)); + Output.vViewPosition = mul(float4(worldPos, 1.0), frame.M_View); + + // Motion vectors (unjittered) + Output.vCurrClipPos = mul(float4(worldPos, 1.0), frame.M_UnjitteredViewProj); + Output.vPrevClipPos = mul(float4(prevWorldPos, 1.0), frame.M_PrevViewProj); + + return Output; +} diff --git a/D3D11Engine/StaticVOBCache.cpp b/D3D11Engine/StaticVOBCache.cpp new file mode 100644 index 00000000..6aeb30a1 --- /dev/null +++ b/D3D11Engine/StaticVOBCache.cpp @@ -0,0 +1,151 @@ +#include "StaticVOBCache.h" +#include +#include "ConstantBufferStructs.h" +#include "WorldObjects.h" +#include "zCModel.h" +#include "zCMaterial.h" +#include + +using namespace DirectX; + +void StaticVOBCache::CullAndGatherStaticVOBs_AVX2( + const std::vector& batches, + const std::vector& instances, + const DirectX::XMFLOAT4 planes[6], + std::vector& outRenderQueue ) +{ + outRenderQueue.clear(); + // Pre-reserve to avoid reallocations + outRenderQueue.reserve( instances.size() * 2 ); + + const __m256 abs_mask = _mm256_castsi256_ps( _mm256_set1_epi32( 0x7FFFFFFF ) ); + const __m256 zero = _mm256_setzero_ps(); + + struct alignas(32) SIMDPlane { + __m256 nx, ny, nz, d; + __m256 abs_nx, abs_ny, abs_nz; + }; + + SIMDPlane splanes[6]; + for ( int p = 0; p < 6; ++p ) { + splanes[p].nx = _mm256_set1_ps( planes[p].x ); + splanes[p].ny = _mm256_set1_ps( planes[p].y ); + splanes[p].nz = _mm256_set1_ps( planes[p].z ); + splanes[p].d = _mm256_set1_ps( planes[p].w ); + + splanes[p].abs_nx = _mm256_and_ps( splanes[p].nx, abs_mask ); + splanes[p].abs_ny = _mm256_and_ps( splanes[p].ny, abs_mask ); + splanes[p].abs_nz = _mm256_and_ps( splanes[p].nz, abs_mask ); + } + + for ( size_t i = 0; i < batches.size(); ++i ) { + const AABB_SoA_Batch8& batch = batches[i]; + + __m256 cx = _mm256_load_ps( batch.cx ); + __m256 cy = _mm256_load_ps( batch.cy ); + __m256 cz = _mm256_load_ps( batch.cz ); + __m256 ex = _mm256_load_ps( batch.ex ); + __m256 ey = _mm256_load_ps( batch.ey ); + __m256 ez = _mm256_load_ps( batch.ez ); + + __m256 v_mask = _mm256_castsi256_ps( _mm256_set1_epi32( 0xFFFFFFFF ) ); + + for ( int p = 0; p < 6; ++p ) { + __m256 nx = splanes[p].nx; + __m256 ny = splanes[p].ny; + __m256 nz = splanes[p].nz; + __m256 d = splanes[p].d; + + __m256 abs_nx = splanes[p].abs_nx; + __m256 abs_ny = splanes[p].abs_ny; + __m256 abs_nz = splanes[p].abs_nz; + + __m256 r = _mm256_mul_ps( ex, abs_nx ); + r = _mm256_fmadd_ps( ey, abs_ny, r ); + r = _mm256_fmadd_ps( ez, abs_nz, r ); + + __m256 dist = _mm256_fmadd_ps( cx, nx, d ); + dist = _mm256_fmadd_ps( cy, ny, dist ); + dist = _mm256_fmadd_ps( cz, nz, dist ); + + __m256 outside = _mm256_cmp_ps( _mm256_sub_ps( dist, r ), zero, _CMP_GT_OQ ); + v_mask = _mm256_andnot_ps( outside, v_mask ); + } + + uint32_t mask = _mm256_movemask_ps( v_mask ); + + // INSTANT SKIP: If mask is 0, all 8 items are outside the frustum. + if ( mask == 0 ) continue; + + // BIT SCAN: Extract visible items efficiently + while ( mask != 0 ) { + // Find the index of the lowest set bit (0 to 7) + uint32_t bitIndex = _tzcnt_u32( mask ); + + // Calculate actual instance index + uint32_t instanceIdx = (i * 8) + bitIndex; + + // Push to dense render queue + outRenderQueue.push_back( { + instanceIdx, + reinterpret_cast(instances[instanceIdx]->VisualInfo), + } ); + + // Clear the lowest set bit so we can find the next one + // e.g., 010100 -> 010000 + mask &= (mask - 1); + } + } +} + +void StaticVOBCache::CullAndGatherStaticVOBs_DirectXMath( + const std::vector& batches, + const std::vector& instances, + const XMFLOAT4 planes[6], + std::vector& outRenderQueue ) +{ + outRenderQueue.clear(); + // Pre-reserve to avoid reallocations + outRenderQueue.reserve( instances.size() * 2 ); + + for ( size_t i = 0; i < batches.size(); ++i ) { + const AABB_SoA_Batch8& batch = batches[i]; + + // Process each of the 8 AABBs in this batch + for ( int j = 0; j < 8; ++j ) { + XMFLOAT3 center( batch.cx[j], batch.cy[j], batch.cz[j] ); + XMFLOAT3 extents( batch.ex[j], batch.ey[j], batch.ez[j] ); + + bool visible = true; + + // Test against all 6 frustum planes + for ( int p = 0; p < 6; ++p ) { + // Get absolute values of plane normal components + float abs_nx = std::abs( planes[p].x ); + float abs_ny = std::abs( planes[p].y ); + float abs_nz = std::abs( planes[p].z ); + + // Calculate the radius (projected extent along plane normal) + float r = extents.x * abs_nx + extents.y * abs_ny + extents.z * abs_nz; + + // Calculate distance from center to plane + float dist = center.x * planes[p].x + center.y * planes[p].y + center.z * planes[p].z + planes[p].w; + + // If dist - r > 0, box is completely outside this plane + if ( dist - r > 0.0f ) { + visible = false; + break; + } + } + + if ( visible ) { + uint32_t instanceIdx = static_cast(i * 8 + j); + + outRenderQueue.push_back( { + instanceIdx, + reinterpret_cast(instances[instanceIdx]->VisualInfo), + } ); + } + } + } +} diff --git a/D3D11Engine/StaticVOBCache.h b/D3D11Engine/StaticVOBCache.h new file mode 100644 index 00000000..f3db3824 --- /dev/null +++ b/D3D11Engine/StaticVOBCache.h @@ -0,0 +1,52 @@ +#pragma once + +#include +#include +#include +#include +#include + +// 1. The SoAoS Bounding Data (Aligned for AVX) +struct alignas(32) AABB_SoA_Batch8 { + float cx[8], cy[8], cz[8]; + float ex[8], ey[8], ez[8]; +}; + +// 3. The Dense Render Item (Output of the culler) +struct StaticVobRenderItem { + uint32_t instanceIndex; // index into an VobInfo* + struct MeshVisualInfo* mvi; +}; + +struct VobInfo; + +class StaticVOBCache +{ +public: + static void CullAndGatherStaticVOBs( + const std::vector& batches, + const std::vector& instances, + const DirectX::XMFLOAT4 planes[6], + std::vector& outRenderQueue ) { +#ifdef __AVX2__ + CullAndGatherStaticVOBs_AVX2( batches, instances, planes, outRenderQueue ); +#else + CullAndGatherStaticVOBs_DirectXMath( batches, instances, planes, outRenderQueue ); +#endif + } + +private: + static void CullAndGatherStaticVOBs_AVX2( + const std::vector& batches, + const std::vector& instances, + const DirectX::XMFLOAT4 planes[6], + std::vector& outRenderQueue ); + + // DirectXMath-based alternative for debugging/verification + static void CullAndGatherStaticVOBs_DirectXMath( + const std::vector& batches, + const std::vector& instances, + const DirectX::XMFLOAT4 planes[6], + std::vector& outRenderQueue ); +}; + diff --git a/D3D11Engine/packages.config b/D3D11Engine/packages.config index 44327afd..6c5d5b62 100644 --- a/D3D11Engine/packages.config +++ b/D3D11Engine/packages.config @@ -2,6 +2,7 @@ + \ No newline at end of file From e50ea6adff798604c8cc2621176b0e190c0faa71 Mon Sep 17 00:00:00 2001 From: kirides <13602143+kirides@users.noreply.github.com> Date: Thu, 5 Mar 2026 23:39:16 +0100 Subject: [PATCH 02/42] rename to VobCulling --- D3D11Engine/D3D11Engine.vcxproj | 4 ++-- D3D11Engine/D3D11Engine.vcxproj.filters | 6 ++++++ D3D11Engine/D3D11GraphicsEngine.cpp | 2 +- D3D11Engine/D3D11GraphicsEngine.h | 1 + D3D11Engine/GothicAPI.h | 1 - D3D11Engine/{StaticVOBCache.cpp => VobCulling.cpp} | 6 +++--- D3D11Engine/{StaticVOBCache.h => VobCulling.h} | 2 +- 7 files changed, 14 insertions(+), 8 deletions(-) rename D3D11Engine/{StaticVOBCache.cpp => VobCulling.cpp} (97%) rename D3D11Engine/{StaticVOBCache.h => VobCulling.h} (98%) diff --git a/D3D11Engine/D3D11Engine.vcxproj b/D3D11Engine/D3D11Engine.vcxproj index b50205f1..9eb80419 100644 --- a/D3D11Engine/D3D11Engine.vcxproj +++ b/D3D11Engine/D3D11Engine.vcxproj @@ -995,7 +995,7 @@ copy "$(OutDir)$(TargetName).pdb" "$(G1_SYSTEM_PATH)\ddraw.pdb" - + @@ -1212,7 +1212,7 @@ copy "$(OutDir)$(TargetName).pdb" "$(G1_SYSTEM_PATH)\ddraw.pdb" - + diff --git a/D3D11Engine/D3D11Engine.vcxproj.filters b/D3D11Engine/D3D11Engine.vcxproj.filters index 6ba362b6..fb3a3a79 100644 --- a/D3D11Engine/D3D11Engine.vcxproj.filters +++ b/D3D11Engine/D3D11Engine.vcxproj.filters @@ -842,6 +842,9 @@ Engine + + Engine + @@ -1146,6 +1149,9 @@ Engine + + Engine + diff --git a/D3D11Engine/D3D11GraphicsEngine.cpp b/D3D11Engine/D3D11GraphicsEngine.cpp index 5d20b684..57bd1d5f 100644 --- a/D3D11Engine/D3D11GraphicsEngine.cpp +++ b/D3D11Engine/D3D11GraphicsEngine.cpp @@ -5453,7 +5453,7 @@ void XM_CALLCONV D3D11GraphicsEngine::DrawWorldAroundForWorldShadow( FXMVECTOR p // clear any residue of main render pass const auto& vobs = m_StaticVobs; std::vector outRenderQueue{}; - StaticVOBCache::CullAndGatherStaticVOBs( m_StaticVobsAABBs, vobs, currentFrustum.GetPlanes()._Elems, outRenderQueue ); + VobCulling::CullAndGatherStaticVOBs( m_StaticVobsAABBs, vobs, currentFrustum.GetPlanes()._Elems, outRenderQueue ); std::sort( outRenderQueue.begin(), outRenderQueue.end(), []( const StaticVobRenderItem& a, const StaticVobRenderItem& b ) { diff --git a/D3D11Engine/D3D11GraphicsEngine.h b/D3D11Engine/D3D11GraphicsEngine.h index d64ad629..3dde3006 100644 --- a/D3D11Engine/D3D11GraphicsEngine.h +++ b/D3D11Engine/D3D11GraphicsEngine.h @@ -7,6 +7,7 @@ #include "D3D11TextureAtlasManager.h" #include "D3D11StructuredBuffer.h" #include "D3D11IndirectBuffer.h" +#include "VobCulling.h" struct RenderToDepthStencilBuffer; diff --git a/D3D11Engine/GothicAPI.h b/D3D11Engine/GothicAPI.h index 40b8d46b..50cf7b85 100644 --- a/D3D11Engine/GothicAPI.h +++ b/D3D11Engine/GothicAPI.h @@ -11,7 +11,6 @@ #include "RenderQueue.h" #include "RenderToTextureBuffer.h" #include "ShaderIDs.h" -#include "StaticVOBCache.h" #define START_TIMING(x) TimerScope( x, &Engine::GAPI->GetRendererState().RendererInfo.Timing.frameRecordings ) diff --git a/D3D11Engine/StaticVOBCache.cpp b/D3D11Engine/VobCulling.cpp similarity index 97% rename from D3D11Engine/StaticVOBCache.cpp rename to D3D11Engine/VobCulling.cpp index 6aeb30a1..db0b0e52 100644 --- a/D3D11Engine/StaticVOBCache.cpp +++ b/D3D11Engine/VobCulling.cpp @@ -1,4 +1,4 @@ -#include "StaticVOBCache.h" +#include "VobCulling.h" #include #include "ConstantBufferStructs.h" #include "WorldObjects.h" @@ -8,7 +8,7 @@ using namespace DirectX; -void StaticVOBCache::CullAndGatherStaticVOBs_AVX2( +void VobCulling::CullAndGatherStaticVOBs_AVX2( const std::vector& batches, const std::vector& instances, const DirectX::XMFLOAT4 planes[6], @@ -98,7 +98,7 @@ void StaticVOBCache::CullAndGatherStaticVOBs_AVX2( } } -void StaticVOBCache::CullAndGatherStaticVOBs_DirectXMath( +void VobCulling::CullAndGatherStaticVOBs_DirectXMath( const std::vector& batches, const std::vector& instances, const XMFLOAT4 planes[6], diff --git a/D3D11Engine/StaticVOBCache.h b/D3D11Engine/VobCulling.h similarity index 98% rename from D3D11Engine/StaticVOBCache.h rename to D3D11Engine/VobCulling.h index f3db3824..4eb9bae9 100644 --- a/D3D11Engine/StaticVOBCache.h +++ b/D3D11Engine/VobCulling.h @@ -20,7 +20,7 @@ struct StaticVobRenderItem { struct VobInfo; -class StaticVOBCache +class VobCulling { public: static void CullAndGatherStaticVOBs( From d371a9c2ab9d93b826dde5e37787835d394456c5 Mon Sep 17 00:00:00 2001 From: kirides <13602143+kirides@users.noreply.github.com> Date: Fri, 6 Mar 2026 11:25:38 +0100 Subject: [PATCH 03/42] resource streaming in and out of memory --- D3D11Engine/ConstantBufferStructs.h | 5 +- D3D11Engine/D3D11Engine.vcxproj | 2 + D3D11Engine/D3D11Engine.vcxproj.filters | 4 + D3D11Engine/D3D11GraphicsEngine.cpp | 257 ++- D3D11Engine/D3D11GraphicsEngine.h | 17 + .../D3D11StreamingResourcesManager.cpp | 1548 +++++++++++++++++ D3D11Engine/D3D11StreamingResourcesManager.h | 261 +++ D3D11Engine/GothicGraphicsState.h | 3 + D3D11Engine/Shaders/CS_CullVobs.hlsl | 18 +- .../VS_ExInstancedObjIndirectAtlas.hlsl | 1 + 10 files changed, 2109 insertions(+), 7 deletions(-) create mode 100644 D3D11Engine/D3D11StreamingResourcesManager.cpp create mode 100644 D3D11Engine/D3D11StreamingResourcesManager.h diff --git a/D3D11Engine/ConstantBufferStructs.h b/D3D11Engine/ConstantBufferStructs.h index e2de17d4..8fbc5655 100644 --- a/D3D11Engine/ConstantBufferStructs.h +++ b/D3D11Engine/ConstantBufferStructs.h @@ -28,6 +28,7 @@ struct VobInstanceInfoAtlas { float vStart; float uEnd; float vEnd; + UINT globalSourceIndex; // global source index into feedback texture }; // Descriptor returned for use with shader @@ -69,7 +70,7 @@ struct SubmeshGPUData { float uStart, vStart, uEnd, vEnd; UINT argIndex; // index into merged indirect args UINT instanceBaseOffset; // fixed write offset in instance buffer - UINT pad; + UINT globalSourceIndex; // global source index into feedback texture }; // Constant buffer for the GPU cull compute shader @@ -80,7 +81,7 @@ struct CullConstants { float globalWindStrength; UINT windAdvanced; UINT numVobs; - UINT pad; + UINT feedbackFrameNumber; // >0 = write feedback in CS; 0 = disabled (e.g. shadow pass) }; #pragma pack (push, 1) diff --git a/D3D11Engine/D3D11Engine.vcxproj b/D3D11Engine/D3D11Engine.vcxproj index 9eb80419..c8c0ef9a 100644 --- a/D3D11Engine/D3D11Engine.vcxproj +++ b/D3D11Engine/D3D11Engine.vcxproj @@ -851,6 +851,7 @@ copy "$(OutDir)$(TargetName).pdb" "$(G1_SYSTEM_PATH)\ddraw.pdb" + @@ -1109,6 +1110,7 @@ copy "$(OutDir)$(TargetName).pdb" "$(G1_SYSTEM_PATH)\ddraw.pdb" + diff --git a/D3D11Engine/D3D11Engine.vcxproj.filters b/D3D11Engine/D3D11Engine.vcxproj.filters index fb3a3a79..4625169d 100644 --- a/D3D11Engine/D3D11Engine.vcxproj.filters +++ b/D3D11Engine/D3D11Engine.vcxproj.filters @@ -846,6 +846,7 @@ Engine + @@ -877,6 +878,9 @@ Engine\D3D11 + + Engine\D3D11 + Engine\D3D11 diff --git a/D3D11Engine/D3D11GraphicsEngine.cpp b/D3D11Engine/D3D11GraphicsEngine.cpp index 57bd1d5f..93b006f6 100644 --- a/D3D11Engine/D3D11GraphicsEngine.cpp +++ b/D3D11Engine/D3D11GraphicsEngine.cpp @@ -99,6 +99,7 @@ static std::unique_ptr agsDevice; extern bool userHaveAMDGPU; bool SupportTextureAtlases = false; +bool SupportStreamingResources = false; namespace { @@ -598,13 +599,23 @@ XRESULT D3D11GraphicsEngine::Init() { if (maxFeatureLevel >= D3D_FEATURE_LEVEL::D3D_FEATURE_LEVEL_11_0) { // check amount of GPU Memory available constexpr uint64_t GiB = 1024ull * 1024ull * 1024ull; - if ( adpDesc.DedicatedVideoMemory >= 4 * GiB ) { + if ( adpDesc.DedicatedVideoMemory >= 3 * GiB ) { // on 32 bit processes dx11 can't see more than 3GiB // currently we just assume everything fits into memory. // in the future we should make use of Tiled Resources, which would allow us // to support more memory intensive features, even on less than 4GB cards, by streaming in the necessary tiles. SupportTextureAtlases = true; Engine::GAPI->GetRendererState().RendererSettings.UseIndirectVobShadows = SupportTextureAtlases; } + + // Check for tiled resource (streaming) support + SupportStreamingResources = D3D11StreamingResourcesManager::GetIsStreamingSupported( Device.Get() ); + Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.FeatureSet.StreamingResourcesSupported = SupportStreamingResources; + if ( SupportStreamingResources ) { + LogInfo() << "Tiled Resources supported — streaming resource manager available"; + // Allow atlas path even on < 4GB cards when streaming is available + SupportTextureAtlases = true; + Engine::GAPI->GetRendererState().RendererSettings.UseIndirectVobShadows = true; + } } LogInfo() << "Creating ShaderManager"; @@ -6238,6 +6249,18 @@ XRESULT D3D11GraphicsEngine::DrawVOBsIndirect( const Frustum& frustum, bool bind auto _ = RecordGraphicsEvent( L"DrawVOBsIndirect" ); + // --- 0. Update streaming tile mappings (only on main pass, not shadow) --- + if ( m_StreamingResources && bindPS ) { + // Read back feedback from 2 frames ago to determine which sources need loading + ReadBackFeedback(); + + XMFLOAT3 camPos = Engine::GAPI->GetCameraPosition(); + float drawDist = Engine::GAPI->GetRendererState().RendererSettings.OutdoorVobDrawRadius; + float currentTime = Engine::GAPI->GetTimeSeconds(); + const std::unordered_set* requested = m_RequestedSources.empty() ? nullptr : &m_RequestedSources; + m_StreamingResources->UpdateStreaming( camPos, drawDist, currentTime, requested ); + } + auto& context = GetContext(); // --- 1. Reset indirect args InstanceCounts via CopyResource from template --- @@ -6254,6 +6277,17 @@ XRESULT D3D11GraphicsEngine::DrawVOBsIndirect( const Frustum& frustum, bool bind cb.windAdvanced = (Engine::GAPI->GetRendererState().RendererSettings.WindQuality == GothicRendererSettings::EWindQuality::WIND_QUALITY_ADVANCED) ? 1 : 0; cb.numVobs = static_cast(m_StaticVobs.size()); + + // Feedback: on main pass, increment frame number and tell CS to stamp visible sources. + // On shadow pass, feedbackFrameNumber = 0 disables feedback writes in the CS. + const bool useFeedback = bindPS && m_FeedbackTexture && m_FeedbackUAV && m_StreamingResources; + if ( useFeedback ) { + m_FeedbackFrameNumber++; + cb.feedbackFrameNumber = m_FeedbackFrameNumber; + } else { + cb.feedbackFrameNumber = 0; + } + m_CullConstantBuffer->UpdateBuffer( &cb ); m_CullConstantBuffer->BindToComputeShader( 0 ); @@ -6277,14 +6311,32 @@ XRESULT D3D11GraphicsEngine::DrawVOBsIndirect( const Frustum& frustum, bool bind }; context->CSSetUnorderedAccessViews( 0, 2, uavs, nullptr ); + // UAV u5 = Feedback texture (only on main pass) + if ( useFeedback ) { + ID3D11UnorderedAccessView* feedbackUAV = m_FeedbackUAV.Get(); + context->CSSetUnorderedAccessViews( 5, 1, &feedbackUAV, nullptr ); + } + UINT numGroups = (static_cast(m_StaticVobs.size()) + 63) / 64; context->Dispatch( numGroups, 1, 1 ); + // Copy feedback to staging ring for async readback (before unbinding UAVs) + if ( useFeedback ) { + context->CopyResource( + m_FeedbackStaging[m_FeedbackStagingHead % 3].Get(), + m_FeedbackTexture.Get() ); + m_FeedbackStagingHead++; + } + // Unbind CS resources ID3D11ShaderResourceView* nullSRV[2] = { nullptr, nullptr }; ID3D11UnorderedAccessView* nullUAV[2] = { nullptr, nullptr }; context->CSSetShaderResources( 0, 2, nullSRV ); context->CSSetUnorderedAccessViews( 0, 2, nullUAV, nullptr ); + if ( useFeedback ) { + ID3D11UnorderedAccessView* nullFeedbackUAV = nullptr; + context->CSSetUnorderedAccessViews( 5, 1, &nullFeedbackUAV, nullptr ); + } context->CSSetShader( nullptr, nullptr, 0 ); // --- 4. Bind global geometry (once) --- @@ -8210,6 +8262,31 @@ void D3D11GraphicsEngine::BuildGPUCullingBuffers() { std::vector mergedArgs; std::unordered_map> visualSubmeshMap; + // Build reverse-lookup: for each format, map (slice, x, y) -> source index + // so we can populate SubmeshGPUData.globalSourceIndex for GPU feedback. + struct SliceXYKey { + UINT slice, x, y; + bool operator==( const SliceXYKey& o ) const { return slice == o.slice && x == o.x && y == o.y; } + }; + struct SliceXYHash { + size_t operator()( const SliceXYKey& k ) const { + return std::hash{}( k.slice ) ^ ( std::hash{}( k.x ) << 11 ) ^ ( std::hash{}( k.y ) << 22 ); + } + }; + std::unordered_map> sourceIndexLookup; + std::unordered_map formatGlobalOffsets; // cached offsets for global index computation + if ( m_StreamingResources ) { + for ( auto& group : m_AtlasDrawGroups ) { + const auto& srcs = m_StreamingResources->GetSourceTextures( group.format ); + auto& lookup = sourceIndexLookup[group.format]; + UINT globalOffset = m_StreamingResources->GetGlobalSourceOffset( group.format ); + formatGlobalOffsets[group.format] = globalOffset; + for ( UINT i = 0; i < static_cast( srcs.size() ); i++ ) { + lookup[{ srcs[i].slice, srcs[i].x, srcs[i].y }] = i; + } + } + } + UINT runningInstanceOffset = 0; UINT globalArgIndex = 0; @@ -8240,6 +8317,27 @@ void D3D11GraphicsEngine::BuildGPUCullingBuffers() { smGPU.vEnd = submesh.atlasDesc.vEnd; smGPU.argIndex = globalArgIndex; smGPU.instanceBaseOffset = runningInstanceOffset; + + // Populate globalSourceIndex for GPU feedback by reverse-looking up pixel coords + smGPU.globalSourceIndex = 0; + if ( m_StreamingResources ) { + auto fmtIt = sourceIndexLookup.find( group.format ); + if ( fmtIt != sourceIndexLookup.end() ) { + // Convert normalized UVs back to pixel coords (atlasSize from texture desc) + D3D11_TEXTURE2D_DESC atlasTexDesc; + m_TextureAtlasses[group.format].atlasTextureArray->GetDesc( &atlasTexDesc ); + UINT px = static_cast( submesh.atlasDesc.uStart * atlasTexDesc.Width + 0.5f ); + UINT py = static_cast( submesh.atlasDesc.vStart * atlasTexDesc.Height + 0.5f ); + UINT sl = static_cast( submesh.atlasDesc.slice ); + auto srcIt = fmtIt->second.find( { sl, px, py } ); + if ( srcIt != fmtIt->second.end() ) { + // Store the global source index (format offset + local index) + UINT globalOffset = formatGlobalOffsets.count( group.format ) ? formatGlobalOffsets[group.format] : 0; + smGPU.globalSourceIndex = globalOffset + srcIt->second; + } + } + } + visualSubmeshMap[visual].push_back( smGPU ); runningInstanceOffset += maxInstances; @@ -8370,6 +8468,116 @@ void D3D11GraphicsEngine::BuildGPUCullingBuffers() { << m_TotalMaxInstances << " max instances"; } +void D3D11GraphicsEngine::CreateFeedbackBuffers() { + // Only create feedback infrastructure when streaming is active + if ( !m_StreamingResources ) + return; + + UINT totalSources = m_StreamingResources->GetTotalSourceCount(); + if ( totalSources == 0 ) + return; + + // Create the feedback texture: RWTexture2D of size (totalSources, 1) + D3D11_TEXTURE2D_DESC texDesc = {}; + texDesc.Width = totalSources; + texDesc.Height = 1; + texDesc.MipLevels = 1; + texDesc.ArraySize = 1; + texDesc.Format = DXGI_FORMAT_R32_UINT; + texDesc.SampleDesc.Count = 1; + texDesc.Usage = D3D11_USAGE_DEFAULT; + texDesc.BindFlags = D3D11_BIND_UNORDERED_ACCESS; + + HRESULT hr = GetDevice()->CreateTexture2D( &texDesc, nullptr, m_FeedbackTexture.ReleaseAndGetAddressOf() ); + if ( FAILED( hr ) ) { + LogError() << "[Feedback] Failed to create feedback texture (" << totalSources << " sources)"; + return; + } + + // Create UAV for the feedback texture + D3D11_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; + uavDesc.Format = DXGI_FORMAT_R32_UINT; + uavDesc.ViewDimension = D3D11_UAV_DIMENSION_TEXTURE2D; + uavDesc.Texture2D.MipSlice = 0; + + hr = GetDevice()->CreateUnorderedAccessView( m_FeedbackTexture.Get(), &uavDesc, m_FeedbackUAV.ReleaseAndGetAddressOf() ); + if ( FAILED( hr ) ) { + LogError() << "[Feedback] Failed to create feedback UAV"; + m_FeedbackTexture.Reset(); + return; + } + + // Clear feedback texture to zero so the first readback doesn't see garbage + const UINT clearValue[4] = { 0, 0, 0, 0 }; + GetContext()->ClearUnorderedAccessViewUint( m_FeedbackUAV.Get(), clearValue ); + + // Create 3 staging textures for async readback (2-frame latency ring) + for ( int i = 0; i < 3; i++ ) { + D3D11_TEXTURE2D_DESC stagingDesc = {}; + stagingDesc.Width = totalSources; + stagingDesc.Height = 1; + stagingDesc.MipLevels = 1; + stagingDesc.ArraySize = 1; + stagingDesc.Format = DXGI_FORMAT_R32_UINT; + stagingDesc.SampleDesc.Count = 1; + stagingDesc.Usage = D3D11_USAGE_STAGING; + stagingDesc.CPUAccessFlags = D3D11_CPU_ACCESS_READ; + + hr = GetDevice()->CreateTexture2D( &stagingDesc, nullptr, m_FeedbackStaging[i].ReleaseAndGetAddressOf() ); + if ( FAILED( hr ) ) { + LogError() << "[Feedback] Failed to create staging texture " << i; + m_FeedbackTexture.Reset(); + m_FeedbackUAV.Reset(); + return; + } + } + + m_FeedbackStagingHead = 0; + m_FeedbackFrameNumber = 0; + m_RequestedSources.clear(); + + LogInfo() << "[Feedback] Created feedback buffers: " << totalSources << " sources"; +} + +void D3D11GraphicsEngine::ReadBackFeedback() { + if ( !m_FeedbackTexture || !m_StreamingResources ) + return; + + UINT totalSources = m_StreamingResources->GetTotalSourceCount(); + if ( totalSources == 0 ) + return; + + // We read back the staging buffer from 2 frames ago (to avoid GPU stalls) + // Only attempt readback once we have at least 3 frames of data + if ( m_FeedbackFrameNumber < 3 ) + return; + + UINT readIndex = ( m_FeedbackStagingHead + 1 ) % 3; // 2 frames behind current + + D3D11_MAPPED_SUBRESOURCE mapped = {}; + HRESULT hr = GetContext()->Map( m_FeedbackStaging[readIndex].Get(), 0, D3D11_MAP_READ, D3D11_MAP_FLAG_DO_NOT_WAIT, &mapped ); + if ( FAILED( hr ) ) { + // GPU hasn't finished yet — skip this frame's readback + return; + } + + m_RequestedSources.clear(); + const UINT* data = static_cast( mapped.pData ); + + // A source is "requested" if its feedback value is a recent frame number + // (within the last few frames). This provides hysteresis against single-frame flickers. + UINT threshold = ( m_FeedbackFrameNumber > 4 ) ? m_FeedbackFrameNumber - 4 : 0; + + for ( UINT i = 0; i < totalSources; i++ ) { + const auto value = data[i]; + if ( value >= threshold ) { + m_RequestedSources.insert( i ); + } + } + + GetContext()->Unmap( m_FeedbackStaging[readIndex].Get(), 0 ); +} + void D3D11GraphicsEngine::BuildSceneTextureAtlasses() { for ( size_t i = 0; i < TEXTURE_ATLAS_MAX; i++ ) { @@ -8378,6 +8586,20 @@ void D3D11GraphicsEngine::BuildSceneTextureAtlasses() { m_TextureAtlasLookup.clear(); m_AtlasDrawGroups.clear(); + // Clean up any previous streaming state + if ( m_StreamingResources ) { + m_StreamingResources->OnWorldUnloaded(); + } + + // Clean up feedback buffers + m_FeedbackTexture.Reset(); + m_FeedbackUAV.Reset(); + for ( int i = 0; i < 3; i++ ) + m_FeedbackStaging[i].Reset(); + m_FeedbackStagingHead = 0; + m_FeedbackFrameNumber = 0; + m_RequestedSources.clear(); + if ( !SupportTextureAtlases ) { return; } @@ -8423,6 +8645,23 @@ void D3D11GraphicsEngine::BuildSceneTextureAtlasses() { return a.Format < b.Format; } ); + // Determine if we should use the streaming (tiled resources) path + Engine::GAPI->GetRendererState().RendererSettings.EnableStreamingResources = SupportStreamingResources; + bool useStreaming = SupportStreamingResources + && Engine::GAPI->GetRendererState().RendererSettings.EnableStreamingResources; + + if ( useStreaming ) { + // Initialize streaming manager if not yet created + if ( !m_StreamingResources ) { + m_StreamingResources = std::make_unique(); + if ( !m_StreamingResources->Init( GetDevice().Get(), GetContext().Get() ) ) { + LogWarn() << "Streaming resources init failed, falling back to monolithic atlases"; + m_StreamingResources.reset(); + useStreaming = false; + } + } + } + // Create atlases per format group (process ALL groups including last) size_t rangeStart = 0; while ( rangeStart < uniqueTextures.size() ) { @@ -8437,7 +8676,15 @@ void D3D11GraphicsEngine::BuildSceneTextureAtlasses() { texPtrs.push_back( uniqueTextures[i].Texture2D.Get() ); std::basic_string_view txView( texPtrs.data(), texPtrs.size() ); - auto atlas = TextureManager::CreateAtlasArray( GetDevice().Get(), GetContext().Get(), txView, 2048, 6 ); + + TextureManager::AtlasResult atlas; + if ( useStreaming && m_StreamingResources ) { + // Streaming path: tiled Texture2DArray backed by tile pool + atlas = m_StreamingResources->CreateStreamingAtlasArray( txView, 2048, 6 ); + } else { + // Monolithic path: fully committed Texture2DArray (original behavior) + atlas = TextureManager::CreateAtlasArray( GetDevice().Get(), GetContext().Get(), txView, 2048, 6 ); + } // Map descriptors back to Gothic texture pointers for ( size_t i = 0; i < texPtrs.size(); i++ ) { @@ -8451,7 +8698,8 @@ void D3D11GraphicsEngine::BuildSceneTextureAtlasses() { rangeStart = rangeEnd; } - LogInfo() << "Atlas: " << uniqueTextures.size() << " unique textures, " << m_TextureAtlasLookup.size() << " mapped"; + LogInfo() << "Atlas: " << uniqueTextures.size() << " unique textures, " << m_TextureAtlasLookup.size() << " mapped" + << ( useStreaming ? " (streaming)" : " (monolithic)" ); // Build global VB/IB and indirect args from atlas data BuildStaticGeometryBuffers(); @@ -8459,6 +8707,9 @@ void D3D11GraphicsEngine::BuildSceneTextureAtlasses() { // Build GPU structured buffers for compute shader culling // currently only used with static vobs when we do atlases. BuildGPUCullingBuffers(); + + // Create GPU feedback buffers for streaming (after atlas and culling buffers are ready) + CreateFeedbackBuffers(); } void D3D11GraphicsEngine::CacheWorldStaticVobs() { diff --git a/D3D11Engine/D3D11GraphicsEngine.h b/D3D11Engine/D3D11GraphicsEngine.h index 3dde3006..9347f8ec 100644 --- a/D3D11Engine/D3D11GraphicsEngine.h +++ b/D3D11Engine/D3D11GraphicsEngine.h @@ -7,6 +7,7 @@ #include "D3D11TextureAtlasManager.h" #include "D3D11StructuredBuffer.h" #include "D3D11IndirectBuffer.h" +#include "D3D11StreamingResourcesManager.h" #include "VobCulling.h" struct RenderToDepthStencilBuffer; @@ -534,4 +535,20 @@ class D3D11GraphicsEngine : public D3D11GraphicsEngineBase { std::unique_ptr m_CullConstantBuffer; std::vector m_MergedArgsReset; // CPU-side template for reset UINT m_TotalMaxInstances = 0; + + /** Streaming resources manager (tiled resources) — opt-in, coexists with monolithic atlas */ + std::unique_ptr m_StreamingResources; + + /** GPU feedback for streaming: source-indexed RWTexture2D tracking which textures need loading */ + Microsoft::WRL::ComPtr m_FeedbackTexture; + Microsoft::WRL::ComPtr m_FeedbackUAV; + Microsoft::WRL::ComPtr m_FeedbackStaging[3]; // ring buffer for async readback + UINT m_FeedbackStagingHead = 0; + UINT m_FeedbackFrameNumber = 0; + std::unordered_set m_RequestedSources; // result of last readback + + /** Read back the feedback texture from 2 frames ago and populate m_RequestedSources */ + void ReadBackFeedback(); + /** Create feedback buffer infrastructure (called after atlas creation) */ + void CreateFeedbackBuffers(); }; diff --git a/D3D11Engine/D3D11StreamingResourcesManager.cpp b/D3D11Engine/D3D11StreamingResourcesManager.cpp new file mode 100644 index 00000000..2b8853f2 --- /dev/null +++ b/D3D11Engine/D3D11StreamingResourcesManager.cpp @@ -0,0 +1,1548 @@ +#include "pch.h" +#include "D3D11StreamingResourcesManager.h" +#include "Logger.h" + +#include +#include +#include + +using Microsoft::WRL::ComPtr; + +// ============================================================================= +// Lifecycle +// ============================================================================= + +D3D11StreamingResourcesManager::~D3D11StreamingResourcesManager() { + Shutdown(); +} + +bool D3D11StreamingResourcesManager::GetIsStreamingSupported( ID3D11Device1* device ) { + if ( !device ) + return false; + + ComPtr device2; + if ( FAILED( device->QueryInterface( IID_PPV_ARGS( &device2 ) ) ) ) + return false; + + D3D11_FEATURE_DATA_D3D11_OPTIONS1 options1 = {}; + if ( FAILED( device2->CheckFeatureSupport( + D3D11_FEATURE_D3D11_OPTIONS1, &options1, sizeof( options1 ) ) ) ) + return false; + + // Tier 1 gives us reserved textures + tile pool + UpdateTileMappings. + // Tier 2 adds clamped LOD feedback (nice but not required). + return options1.TiledResourcesTier >= D3D11_TILED_RESOURCES_TIER_2; +} + +bool D3D11StreamingResourcesManager::Init( ID3D11Device1* device, ID3D11DeviceContext1* context ) { + if ( !device || !context ) + return false; + + // QueryInterface up to ID3D11Device2 / ID3D11DeviceContext2 + if ( FAILED( device->QueryInterface( IID_PPV_ARGS( &m_Device2 ) ) ) ) { + LogError() << "[StreamingResources] Failed to QueryInterface ID3D11Device2"; + return false; + } + + if ( FAILED( context->QueryInterface( IID_PPV_ARGS( &m_Context2 ) ) ) ) { + LogError() << "[StreamingResources] Failed to QueryInterface ID3D11DeviceContext2"; + return false; + } + + m_Device = device; + m_Context = context; + m_Initialized = true; + + LogInfo() << "[StreamingResources] Initialized successfully (Tiled Resources supported)"; + return true; +} + +void D3D11StreamingResourcesManager::Shutdown() { + OnWorldUnloaded(); + + m_Context2.Reset(); + m_Device2.Reset(); + m_Context.Reset(); + m_Device.Reset(); + m_Initialized = false; +} + +void D3D11StreamingResourcesManager::OnWorldUnloaded() { + // Clear all tile tracking + m_TileStates.clear(); + while ( !m_LoadQueue.empty() ) m_LoadQueue.pop(); + m_UnloadCandidates.clear(); + m_LoadedSources.clear(); + + // Release tiled atlas textures + m_TiledAtlases.clear(); + + // Release tile pools + m_TilePools.clear(); + m_DefaultTiles.clear(); + + // Release source texture references + m_SourceTextures.clear(); + + // Clear global source offset tracking + m_GlobalSourceOffsets.clear(); + m_TotalSourceCount = 0; + + // Reset staging ring + for ( auto& s : m_StagingRing ) { + s.texture.Reset(); + s.inUse = false; + } + m_StagingRingHead = 0; +} + +// ============================================================================= +// Feedback Query Methods +// ============================================================================= + +UINT D3D11StreamingResourcesManager::GetGlobalSourceOffset( DXGI_FORMAT fmt ) const { + auto it = m_GlobalSourceOffsets.find( fmt ); + return ( it != m_GlobalSourceOffsets.end() ) ? it->second : 0; +} + +UINT D3D11StreamingResourcesManager::GetTotalSourceCount() const { + return m_TotalSourceCount; +} + +const std::vector& +D3D11StreamingResourcesManager::GetSourceTextures( DXGI_FORMAT fmt ) const { + auto it = m_SourceTextures.find( fmt ); + if ( it != m_SourceTextures.end() ) + return it->second; + static const std::vector empty; + return empty; +} + +// ============================================================================= +// Key generation +// ============================================================================= + +uint64_t D3D11StreamingResourcesManager::MakeTileKey( + DXGI_FORMAT fmt, UINT subresource, UINT tileX, UINT tileY ) { + // Pack into 64 bits: [fmt:16][subresource:16][tileX:16][tileY:16] + return ( static_cast( fmt ) << 48 ) + | ( static_cast( subresource & 0xFFFF ) << 32 ) + | ( static_cast( tileX & 0xFFFF ) << 16 ) + | ( static_cast( tileY & 0xFFFF ) ); +} + +uint64_t D3D11StreamingResourcesManager::MakeSourceKey( + DXGI_FORMAT fmt, UINT sourceIndex, UINT mip ) { + // Pack into 64 bits: [fmt:16][sourceIndex:32][mip:16] + return ( static_cast( fmt ) << 48 ) + | ( static_cast( sourceIndex ) << 16 ) + | ( static_cast( mip & 0xFFFF ) ); +} + +// ============================================================================= +// Tile Pool Management +// ============================================================================= + +bool D3D11StreamingResourcesManager::CreateTilePool( DXGI_FORMAT fmt, UINT numTiles ) { + D3D11_BUFFER_DESC poolDesc = {}; + poolDesc.ByteWidth = numTiles * TILE_SIZE_BYTES; + poolDesc.Usage = D3D11_USAGE_DEFAULT; + poolDesc.MiscFlags = D3D11_RESOURCE_MISC_TILE_POOL; + + TilePool pool; + pool.totalTiles = numTiles; + pool.usedTiles = 0; + + HRESULT hr = m_Device2->CreateBuffer( &poolDesc, nullptr, pool.buffer.GetAddressOf() ); + if ( FAILED( hr ) ) { + LogError() << "[StreamingResources] Failed to create tile pool for format " + << static_cast( fmt ) << " (hr=" << hr << ")"; + return false; + } + + m_TilePools[fmt] = std::move( pool ); + + LogInfo() << "[StreamingResources] Created tile pool: " << numTiles << " tiles (" + << ( numTiles * TILE_SIZE_BYTES / ( 1024 * 1024 ) ) << " MB) for format " + << static_cast( fmt ); + return true; +} + +void D3D11StreamingResourcesManager::GrowTilePool( DXGI_FORMAT fmt, UINT additionalTiles ) { + auto it = m_TilePools.find( fmt ); + if ( it == m_TilePools.end() ) + return; + + TilePool& pool = it->second; + UINT newTotal = pool.totalTiles + additionalTiles; + UINT64 newSizeBytes = static_cast( newTotal ) * TILE_SIZE_BYTES; + + // ID3D11DeviceContext2::ResizeTilePool resizes the pool buffer in-place. + // Existing tile data is preserved; new tiles are appended. + HRESULT hr = m_Context2->ResizeTilePool( pool.buffer.Get(), newSizeBytes ); + if ( FAILED( hr ) ) { + LogWarn() << "[StreamingResources] Failed to grow tile pool for format " + << static_cast( fmt ) << " (hr=" << hr << ")"; + return; + } + + pool.totalTiles = newTotal; + + LogInfo() << "[StreamingResources] Grew tile pool to " << newTotal << " tiles (" + << ( newTotal * TILE_SIZE_BYTES / ( 1024 * 1024 ) ) << " MB) for format " + << static_cast( fmt ); +} + +UINT D3D11StreamingResourcesManager::AllocateTile( DXGI_FORMAT fmt ) { + auto it = m_TilePools.find( fmt ); + if ( it == m_TilePools.end() ) + return UINT_MAX; + + TilePool& pool = it->second; + + // Prefer recycled tiles + if ( !pool.freeTiles.empty() ) { + UINT idx = pool.freeTiles.back(); + pool.freeTiles.pop_back(); + return idx; + } + + // Allocate from high-water mark + if ( pool.usedTiles < pool.totalTiles ) { + return pool.usedTiles++; + } + + // Pool exhausted — grow it + UINT growth = std::max( pool.totalTiles / 2, 64 ); + GrowTilePool( fmt, growth ); + + if ( pool.usedTiles < pool.totalTiles ) { + return pool.usedTiles++; + } + + LogError() << "[StreamingResources] Tile pool exhausted and growth failed for format " + << static_cast( fmt ); + return UINT_MAX; +} + +void D3D11StreamingResourcesManager::FreeTile( DXGI_FORMAT fmt, UINT tileIndex ) { + auto it = m_TilePools.find( fmt ); + if ( it == m_TilePools.end() ) + return; + + it->second.freeTiles.push_back( tileIndex ); +} + +// ============================================================================= +// Default Tile +// ============================================================================= + +void D3D11StreamingResourcesManager::FillDefaultTileData( + DXGI_FORMAT fmt, std::vector& outData ) { + outData.resize( TILE_SIZE_BYTES ); + + switch ( fmt ) { + case DXGI_FORMAT_BC1_UNORM: + case DXGI_FORMAT_BC1_UNORM_SRGB: { + // BC1 block: 8 bytes per 4x4 pixel block + // Magenta = (255, 0, 255) encoded as two 16-bit RGB565 endpoints + // RGB565: R=31, G=0, B=31 → 0xF81F + uint8_t block[8] = {}; + uint16_t color = 0xF81F; // magenta in RGB565 + memcpy( &block[0], &color, 2 ); // color0 + memcpy( &block[2], &color, 2 ); // color1 + // Indices: all 0 (use color0) → block[4..7] = 0x00 + block[4] = 0x00; block[5] = 0x00; block[6] = 0x00; block[7] = 0x00; + + for ( UINT i = 0; i < TILE_SIZE_BYTES; i += 8 ) { + memcpy( outData.data() + i, block, 8 ); + } + break; + } + case DXGI_FORMAT_BC2_UNORM: + case DXGI_FORMAT_BC2_UNORM_SRGB: { + // BC2 block: 16 bytes (8 alpha + 8 color) + // Transparent-black: alpha = 0 so DoAlphaTest() clips these pixels, + // preventing magenta seams on alpha-tested geometry (trees, fences). + uint8_t block[16] = {}; + // Alpha: all 0x00 (fully transparent, 4 bits per pixel, 16 pixels) + memset( &block[0], 0x00, 8 ); + // Color: black (RGB565 = 0x0000) + uint16_t color = 0x0000; + memcpy( &block[8], &color, 2 ); + memcpy( &block[10], &color, 2 ); + block[12] = 0x00; block[13] = 0x00; block[14] = 0x00; block[15] = 0x00; + + for ( UINT i = 0; i < TILE_SIZE_BYTES; i += 16 ) { + memcpy( outData.data() + i, block, 16 ); + } + break; + } + case DXGI_FORMAT_BC3_UNORM: + case DXGI_FORMAT_BC3_UNORM_SRGB: { + // BC3 block: 16 bytes (8 alpha + 8 color) + // Transparent-black: alpha = 0 so DoAlphaTest() clips these pixels, + // preventing magenta seams on alpha-tested geometry (trees, fences). + uint8_t block[16] = {}; + // Alpha: alpha0=0x00, alpha1=0x00, indices all 0 → all pixels = 0x00 + block[0] = 0x00; // alpha0 + block[1] = 0x00; // alpha1 + // Alpha indices: all 0 → bytes 2..7 = 0 + block[2] = 0x00; block[3] = 0x00; block[4] = 0x00; + block[5] = 0x00; block[6] = 0x00; block[7] = 0x00; + // Color: black (RGB565 = 0x0000) + uint16_t color = 0x0000; + memcpy( &block[8], &color, 2 ); + memcpy( &block[10], &color, 2 ); + block[12] = 0x00; block[13] = 0x00; block[14] = 0x00; block[15] = 0x00; + + for ( UINT i = 0; i < TILE_SIZE_BYTES; i += 16 ) { + memcpy( outData.data() + i, block, 16 ); + } + break; + } + case DXGI_FORMAT_B8G8R8A8_UNORM: + case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB: { + // BGRA: B=0xFF, G=0x00, R=0xFF, A=0xFF → magenta + for ( UINT i = 0; i < TILE_SIZE_BYTES; i += 4 ) { + outData[i + 0] = 0xFF; // B + outData[i + 1] = 0x00; // G + outData[i + 2] = 0xFF; // R + outData[i + 3] = 0xFF; // A + } + break; + } + case DXGI_FORMAT_R8G8B8A8_UNORM: + case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB: { + // RGBA: R=0xFF, G=0x00, B=0xFF, A=0xFF → magenta + for ( UINT i = 0; i < TILE_SIZE_BYTES; i += 4 ) { + outData[i + 0] = 0xFF; // R + outData[i + 1] = 0x00; // G + outData[i + 2] = 0xFF; // B + outData[i + 3] = 0xFF; // A + } + break; + } + default: + // For unknown formats, fill with 0xFF pattern + memset( outData.data(), 0xFF, TILE_SIZE_BYTES ); + break; + } +} + +void D3D11StreamingResourcesManager::InitDefaultTile( DXGI_FORMAT fmt ) { + auto poolIt = m_TilePools.find( fmt ); + if ( poolIt == m_TilePools.end() ) + return; + + // Reserve tile 0 as the default tile + TilePool& pool = poolIt->second; + if ( pool.usedTiles == 0 ) + pool.usedTiles = 1; // tile 0 is reserved + + // Generate magenta fill data + std::vector tileData; + FillDefaultTileData( fmt, tileData ); + + // Upload default tile data to the tile pool at index 0. + // We use UpdateTileMappings to map a temporary tiled texture tile to pool[0], + // then write the data. But since we can't write directly to a tile pool, + // we'll write via the atlas itself after mapping. + // + // For the initial setup, we write the default tile data via a staging texture + // and CopySubresourceRegion to a temporary tile-mapped region. + // Alternatively, we can use ID3D11DeviceContext2::UpdateTiles. + auto atlasIt = m_TiledAtlases.find( fmt ); + if ( atlasIt == m_TiledAtlases.end() ) + return; + + const TiledAtlas& atlas = atlasIt->second; + + // First, map tile (0,0) of the coarsest mip to pool tile 0 + D3D11_TILED_RESOURCE_COORDINATE coord = {}; + coord.Subresource = D3D11CalcSubresource( atlas.mipLevels - 1, 0, atlas.mipLevels ); + coord.X = 0; + coord.Y = 0; + coord.Z = 0; + + D3D11_TILE_REGION_SIZE regionSize = {}; + regionSize.NumTiles = 1; + regionSize.bUseBox = FALSE; + + UINT poolOffset = 0; // tile index 0 + + m_Context2->UpdateTileMappings( + atlas.texture.Get(), + 1, // numRegions + &coord, + ®ionSize, + pool.buffer.Get(), + 1, // numRanges + nullptr, // rangeFlags (nullptr = default = use tile pool offsets) + &poolOffset, + nullptr, // rangeTileCounts (nullptr with single tile) + 0 // flags + ); + + // Now use UpdateTiles to write the data directly to the mapped tile + D3D11_TILED_RESOURCE_COORDINATE updateCoord = coord; + D3D11_TILE_REGION_SIZE updateRegion = regionSize; + + m_Context2->UpdateTiles( + atlas.texture.Get(), + &updateCoord, + &updateRegion, + tileData.data(), + 0 // flags + ); + + DefaultTile& dt = m_DefaultTiles[fmt]; + dt.poolIndex = 0; + dt.initialized = true; + + LogInfo() << "[StreamingResources] Default magenta tile initialized for format " + << static_cast( fmt ); +} + +// ============================================================================= +// Tile Mapping +// ============================================================================= + +void D3D11StreamingResourcesManager::MapTileToDefault( + const TiledAtlas& atlas, UINT subresource, + UINT tileX, UINT tileY, DXGI_FORMAT fmt ) { + + auto dtIt = m_DefaultTiles.find( fmt ); + if ( dtIt == m_DefaultTiles.end() || !dtIt->second.initialized ) + return; + + auto poolIt = m_TilePools.find( fmt ); + if ( poolIt == m_TilePools.end() ) + return; + + D3D11_TILED_RESOURCE_COORDINATE coord = {}; + coord.Subresource = subresource; + coord.X = tileX; + coord.Y = tileY; + coord.Z = 0; + + D3D11_TILE_REGION_SIZE regionSize = {}; + regionSize.NumTiles = 1; + regionSize.bUseBox = FALSE; + + UINT poolOffset = dtIt->second.poolIndex; // always tile 0 + + // Map this tile to the shared default tile (many-to-one mapping is allowed) + m_Context2->UpdateTileMappings( + atlas.texture.Get(), + 1, &coord, ®ionSize, + poolIt->second.buffer.Get(), + 1, nullptr, &poolOffset, nullptr, + 0 + ); +} + +void D3D11StreamingResourcesManager::MapTileToPool( + const TiledAtlas& atlas, UINT subresource, + UINT tileX, UINT tileY, UINT poolTileIndex, + DXGI_FORMAT fmt ) { + + auto poolIt = m_TilePools.find( fmt ); + if ( poolIt == m_TilePools.end() ) + return; + + D3D11_TILED_RESOURCE_COORDINATE coord = {}; + coord.Subresource = subresource; + coord.X = tileX; + coord.Y = tileY; + coord.Z = 0; + + D3D11_TILE_REGION_SIZE regionSize = {}; + regionSize.NumTiles = 1; + regionSize.bUseBox = FALSE; + + m_Context2->UpdateTileMappings( + atlas.texture.Get(), + 1, &coord, ®ionSize, + poolIt->second.buffer.Get(), + 1, nullptr, &poolTileIndex, nullptr, + 0 + ); +} + +void D3D11StreamingResourcesManager::MapAllTilesToDefault( + const TiledAtlas& atlas, DXGI_FORMAT fmt ) { + + auto dtIt = m_DefaultTiles.find( fmt ); + if ( dtIt == m_DefaultTiles.end() || !dtIt->second.initialized ) + return; + auto poolIt = m_TilePools.find( fmt ); + if ( poolIt == m_TilePools.end() ) + return; + + // Use GetResourceTiling to discover the tile layout + UINT numTilesForResource = 0; + D3D11_PACKED_MIP_DESC packedMipDesc = {}; + D3D11_TILE_SHAPE tileShape = {}; + UINT numSubresourceTilings = atlas.mipLevels * atlas.arraySlices; + std::vector subresourceTilings( numSubresourceTilings ); + + m_Device2->GetResourceTiling( + atlas.texture.Get(), + &numTilesForResource, + &packedMipDesc, + &tileShape, + &numSubresourceTilings, + 0, + subresourceTilings.data() + ); + + // Map all tiles across all subresources to the default tile + UINT defaultPoolOffset = dtIt->second.poolIndex; + + for ( UINT sub = 0; sub < numSubresourceTilings; ++sub ) { + const auto& tiling = subresourceTilings[sub]; + if ( tiling.WidthInTiles == 0 || tiling.HeightInTiles == 0 ) + continue; + + UINT totalTilesInSub = tiling.WidthInTiles * tiling.HeightInTiles; + + // Map the entire subresource to the default tile using a single call + D3D11_TILED_RESOURCE_COORDINATE coord = {}; + coord.Subresource = sub; + coord.X = 0; + coord.Y = 0; + coord.Z = 0; + + D3D11_TILE_REGION_SIZE regionSize = {}; + regionSize.NumTiles = totalTilesInSub; + regionSize.bUseBox = TRUE; + regionSize.Width = tiling.WidthInTiles; + regionSize.Height = tiling.HeightInTiles; + regionSize.Depth = 1; + + // All tiles map to the same default pool tile (reuse mapping) + UINT rangeFlag = D3D11_TILE_RANGE_REUSE_SINGLE_TILE; + UINT rangeCount = totalTilesInSub; + + m_Context2->UpdateTileMappings( + atlas.texture.Get(), + 1, &coord, ®ionSize, + poolIt->second.buffer.Get(), + 1, &rangeFlag, &defaultPoolOffset, &rangeCount, + 0 + ); + + // Record tile states as Unmapped (pointing to default) + UINT mip = sub % atlas.mipLevels; + for ( UINT ty = 0; ty < tiling.HeightInTiles; ++ty ) { + for ( UINT tx = 0; tx < tiling.WidthInTiles; ++tx ) { + uint64_t key = MakeTileKey( fmt, sub, tx, ty ); + TileInfo& info = m_TileStates[key]; + info.state = TileState::Unmapped; + info.subresource = sub; + info.tileX = tx; + info.tileY = ty; + info.format = fmt; + info.poolTileIndex = 0; + info.lastUsedTime = 0.0f; + info.priority = 0.0f; + } + } + } + + // Handle packed mips (mips packed into shared tiles at the tail of the resource) + if ( packedMipDesc.NumPackedMips > 0 && packedMipDesc.NumTilesForPackedMips > 0 ) { + // For packed mips, we map the packed tile region for each array slice. + // Each slice has NumTilesForPackedMips consecutive tiles starting at + // packedMipDesc.StartTileIndexInOverallResource (for slice 0). + for ( UINT slice = 0; slice < atlas.arraySlices; ++slice ) { + UINT startTile = packedMipDesc.StartTileIndexInOverallResource + + slice * packedMipDesc.NumTilesForPackedMips; + + // Use NULL coordinates + NULL region to map by absolute tile index + UINT rangeFlag = D3D11_TILE_RANGE_REUSE_SINGLE_TILE; + UINT rangeCount = packedMipDesc.NumTilesForPackedMips; + + m_Context2->UpdateTileMappings( + atlas.texture.Get(), + 1, nullptr, nullptr, // NULL = map by start tile offset + poolIt->second.buffer.Get(), + 1, &rangeFlag, &defaultPoolOffset, &rangeCount, + 0 + ); + } + } + + LogInfo() << "[StreamingResources] Mapped all " << numTilesForResource + << " tiles to default for format " << static_cast( fmt ); +} + +// ============================================================================= +// Subresource Tile Layout +// ============================================================================= + +void D3D11StreamingResourcesManager::GetSubresourceTileCount( + const TiledAtlas& atlas, UINT mipLevel, + UINT& tilesX, UINT& tilesY ) const { + + UINT numSubresourceTilings = atlas.mipLevels * atlas.arraySlices; + std::vector tilings( numSubresourceTilings ); + UINT totalTiles = 0; + D3D11_PACKED_MIP_DESC packedDesc = {}; + D3D11_TILE_SHAPE tileShape = {}; + + m_Device2->GetResourceTiling( + atlas.texture.Get(), + &totalTiles, &packedDesc, &tileShape, + &numSubresourceTilings, 0, tilings.data() + ); + + if ( mipLevel < numSubresourceTilings ) { + tilesX = tilings[mipLevel].WidthInTiles; + tilesY = tilings[mipLevel].HeightInTiles; + } else { + tilesX = 0; + tilesY = 0; + } +} + +// ============================================================================= +// Atlas Creation +// ============================================================================= + +TextureManager::AtlasResult D3D11StreamingResourcesManager::CreateStreamingAtlasArray( + std::basic_string_view sourceTextures, + UINT atlasSize, UINT mipLevels ) { + + if ( sourceTextures.empty() || !m_Initialized ) + return {}; + + TextureManager::AtlasResult result; + result.descriptors.resize( sourceTextures.size() ); + + // --- 1. Get format from first texture --- + D3D11_TEXTURE2D_DESC firstDesc; + sourceTextures[0]->GetDesc( &firstDesc ); + DXGI_FORMAT atlasFormat = firstDesc.Format; + + // --- 2. Run the same shelf-packing algorithm as TextureManager --- + const UINT blockSize = []( DXGI_FORMAT fmt ) -> UINT { + switch ( fmt ) { + case DXGI_FORMAT_BC1_UNORM: case DXGI_FORMAT_BC1_UNORM_SRGB: + case DXGI_FORMAT_BC2_UNORM: case DXGI_FORMAT_BC2_UNORM_SRGB: + case DXGI_FORMAT_BC3_UNORM: case DXGI_FORMAT_BC3_UNORM_SRGB: + return 4; + default: return 1; + } + }( atlasFormat ); + + const UINT MipAlignment = blockSize * ( 1 << ( mipLevels - 1 ) ); + + struct PackItem { + int originalIndex; + UINT width, height; + UINT x, y, slice; + D3D11_TEXTURE2D_DESC desc; + }; + + std::vector items; + items.reserve( sourceTextures.size() ); + + for ( size_t i = 0; i < sourceTextures.size(); ++i ) { + D3D11_TEXTURE2D_DESC desc; + sourceTextures[i]->GetDesc( &desc ); + items.push_back( { static_cast( i ), desc.Width, desc.Height, 0, 0, 0, desc } ); + } + + // Sort by height descending for shelf packing + std::sort( items.begin(), items.end(), []( const PackItem& a, const PackItem& b ) { + return a.height > b.height; + } ); + + auto Align = []( UINT value, UINT alignment ) -> UINT { + return ( value + alignment - 1 ) & ~( alignment - 1 ); + }; + + // Shelf packing + UINT currentX = 0, currentY = 0, currentShelfHeight = 0, currentSlice = 0; + for ( auto& item : items ) { + UINT alignedW = Align( item.width, MipAlignment ); + UINT alignedH = Align( item.height, MipAlignment ); + + if ( currentX + alignedW > atlasSize ) { + currentX = 0; + currentY += Align( currentShelfHeight, MipAlignment ); + currentShelfHeight = 0; + } + if ( currentY + alignedH > atlasSize ) { + currentSlice++; + currentX = 0; + currentY = 0; + currentShelfHeight = 0; + } + + item.x = currentX; + item.y = currentY; + item.slice = currentSlice; + + currentX += alignedW; + currentShelfHeight = std::max( currentShelfHeight, alignedH ); + } + + UINT totalSlices = currentSlice + 1; + + // --- 2b. Clamp mip levels for tiled resource array constraints --- + // On Tier 2 (and Tier 1): when ArraySize > 1, every mip must have dimensions + // >= the standard tile extent. Sub-tile mips ("packed mips") are NOT supported + // for texture arrays. Determine tile dimensions and limit mip count. + if ( totalSlices > 1 ) { + UINT tileW = 128, tileH = 128; // conservative default for 32bpp + switch ( atlasFormat ) { + case DXGI_FORMAT_BC1_UNORM: case DXGI_FORMAT_BC1_UNORM_SRGB: + tileW = 512; tileH = 256; break; // 0.5 bytes/texel + case DXGI_FORMAT_BC2_UNORM: case DXGI_FORMAT_BC2_UNORM_SRGB: + case DXGI_FORMAT_BC3_UNORM: case DXGI_FORMAT_BC3_UNORM_SRGB: + tileW = 256; tileH = 256; break; // 1 byte/texel + case DXGI_FORMAT_R8G8B8A8_UNORM: case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB: + case DXGI_FORMAT_B8G8R8A8_UNORM: case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB: + tileW = 128; tileH = 128; break; // 4 bytes/texel + default: break; + } + + // Count how many mip levels fit without going below tile extents + UINT maxMips = 0; + for ( UINT m = 0; m < mipLevels; ++m ) { + UINT mipW = std::max( 1u, atlasSize >> m ); + UINT mipH = std::max( 1u, atlasSize >> m ); + if ( mipW < tileW || mipH < tileH ) + break; + maxMips = m + 1; + } + maxMips = std::max( maxMips, 1 ); // at least 1 mip + + if ( maxMips < mipLevels ) { + LogInfo() << "[StreamingResources] Clamping mip levels from " << mipLevels + << " to " << maxMips << " for array size " << totalSlices + << " (tile extent " << tileW << "x" << tileH + << ", format " << static_cast( atlasFormat ) << ")"; + mipLevels = maxMips; + } + } + + // --- 3. Create the tiled Texture2DArray --- + D3D11_TEXTURE2D_DESC arrayDesc = {}; + arrayDesc.Width = atlasSize; + arrayDesc.Height = atlasSize; + arrayDesc.MipLevels = mipLevels; + arrayDesc.ArraySize = totalSlices; + arrayDesc.Format = atlasFormat; + arrayDesc.SampleDesc.Count = 1; + arrayDesc.SampleDesc.Quality = 0; + arrayDesc.Usage = D3D11_USAGE_DEFAULT; + arrayDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + arrayDesc.MiscFlags = D3D11_RESOURCE_MISC_TILED; + + ComPtr tiledTexture; + HRESULT hr = m_Device2->CreateTexture2D( &arrayDesc, nullptr, tiledTexture.GetAddressOf() ); + if ( FAILED( hr ) ) { + LogError() << "[StreamingResources] Failed to create tiled Texture2DArray (hr=" << hr << ")"; + return {}; + } + + // --- 4. Create tile pool --- + if ( !CreateTilePool( atlasFormat, INITIAL_POOL_TILES ) ) { + return {}; + } + + // --- 5. Store the atlas --- + TiledAtlas& atlas = m_TiledAtlases[atlasFormat]; + atlas.texture = tiledTexture; + atlas.atlasSize = atlasSize; + atlas.mipLevels = mipLevels; + atlas.arraySlices = totalSlices; + atlas.format = atlasFormat; + + // --- 6. Initialize default tile and map all to it --- + InitDefaultTile( atlasFormat ); + MapAllTilesToDefault( atlas, atlasFormat ); + + // --- 7. Create SRV --- + D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.Format = atlasFormat; + srvDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2DARRAY; + srvDesc.Texture2DArray.MostDetailedMip = 0; + srvDesc.Texture2DArray.MipLevels = mipLevels; + srvDesc.Texture2DArray.FirstArraySlice = 0; + srvDesc.Texture2DArray.ArraySize = totalSlices; + + ComPtr srv; + hr = m_Device2->CreateShaderResourceView( tiledTexture.Get(), &srvDesc, srv.GetAddressOf() ); + if ( FAILED( hr ) ) { + LogError() << "[StreamingResources] Failed to create SRV for tiled atlas (hr=" << hr << ")"; + return {}; + } + atlas.srv = srv; + + // --- 8. Store source texture info for streaming uploads --- + auto& sources = m_SourceTextures[atlasFormat]; + sources.clear(); + sources.reserve( items.size() ); + for ( const auto& item : items ) { + SourceTextureInfo si; + si.texture = sourceTextures[item.originalIndex]; + si.x = item.x; + si.y = item.y; + si.slice = item.slice; + si.width = item.width; + si.height = item.height; + si.sourceMipLevels = item.desc.MipLevels; + sources.push_back( std::move( si ) ); + } + + // --- 8b. Compute global source offsets for feedback texture indexing --- + // Each format's sources get a contiguous range in the flat feedback texture. + // This must be done after all atlas formats have been populated, but since + // CreateStreamingAtlasArray is called once per format, we recompute every time. + m_GlobalSourceOffsets.clear(); + m_TotalSourceCount = 0; + for ( const auto& [fmt, srcVec] : m_SourceTextures ) { + m_GlobalSourceOffsets[fmt] = m_TotalSourceCount; + m_TotalSourceCount += static_cast( srcVec.size() ); + } + + // --- 9. Write descriptors in original input order --- + for ( const auto& item : items ) { + TextureDescriptor& outDesc = result.descriptors[item.originalIndex]; + outDesc.slice = item.slice; + outDesc.uStart = static_cast( item.x ) / atlasSize; + outDesc.vStart = static_cast( item.y ) / atlasSize; + outDesc.uEnd = static_cast( item.x + item.width ) / atlasSize; + outDesc.vEnd = static_cast( item.y + item.height ) / atlasSize; + } + + // --- 10. Hand back raw pointers for AtlasResult (caller manages lifetime) --- + // Note: The tiled texture and SRV are owned by m_TiledAtlases; return raw ptrs + // that the AtlasResult can reference. We override Destroy() behavior by keeping + // our own refs. + result.atlasTextureArray = tiledTexture.Get(); + result.atlasSRV = srv.Get(); + + // AddRef so the raw pointers in AtlasResult remain valid + result.atlasTextureArray->AddRef(); + result.atlasSRV->AddRef(); + + // --- 11. Preload coarsest mip levels --- + PreloadCoarseMips( atlasFormat ); + + LogInfo() << "[StreamingResources] Created streaming atlas: " + << atlasSize << "x" << atlasSize << " x " << totalSlices << " slices, " + << mipLevels << " mips, format " << static_cast( atlasFormat ); + + return result; +} + +// ============================================================================= +// Preload Coarse Mips +// ============================================================================= + +void D3D11StreamingResourcesManager::PreloadCoarseMips( DXGI_FORMAT fmt ) { + auto atlasIt = m_TiledAtlases.find( fmt ); + if ( atlasIt == m_TiledAtlases.end() ) + return; + + auto srcIt = m_SourceTextures.find( fmt ); + if ( srcIt == m_SourceTextures.end() ) + return; + + const TiledAtlas& atlas = atlasIt->second; + const auto& sources = srcIt->second; + + // Query the tiling layout once to determine standard vs packed mip levels. + // Packed mips are below the minimum tile dimension and cannot have individual + // tiles mapped in a Texture2DArray — they are already covered by the default + // tile mapping established in MapAllTilesToDefault. + UINT numSubresourceTilings = atlas.mipLevels * atlas.arraySlices; + std::vector tilings( numSubresourceTilings ); + UINT totalTilesInResource = 0; + D3D11_PACKED_MIP_DESC packedDesc = {}; + D3D11_TILE_SHAPE tileShape = {}; + m_Device2->GetResourceTiling( + atlas.texture.Get(), + &totalTilesInResource, &packedDesc, &tileShape, + &numSubresourceTilings, 0, tilings.data() + ); + + // Standard mips: [0, standardMips) support per-tile mappings. + // Packed mips: [standardMips, mipLevels) share a single packed allocation. + UINT standardMips = atlas.mipLevels - packedDesc.NumPackedMips; + if ( standardMips == 0 ) { + LogInfo() << "[StreamingResources] All mips are packed for format " + << static_cast( fmt ) << " — nothing to preload"; + return; + } + + // Preload the PRELOADED_COARSE_MIPS coarsest *standard* mip levels only. + // The original code iterated atlas.mipLevels - PRELOADED_COARSE_MIPS which, + // for a 2048px BC atlas (tile=256px), lands on mip 5 (64px) — a packed mip. + // GetSubresourceTileCount then returns (0,0) and every source is skipped, + // leaving all tiles on the default magenta tile with no real data ever loaded. + UINT preloadStart = ( standardMips > PRELOADED_COARSE_MIPS ) + ? standardMips - PRELOADED_COARSE_MIPS + : 0; + + UINT tilesUploaded = 0; + + for ( UINT mip = preloadStart; mip < standardMips; ++mip ) { + for ( UINT srcIdx = 0; srcIdx < static_cast( sources.size() ); ++srcIdx ) { + const auto& src = sources[srcIdx]; + + // Missing mips are handled by GenerateMissingMips below — skip here + if ( mip >= src.sourceMipLevels ) + continue; + + UINT dstSub = D3D11CalcSubresource( mip, src.slice, atlas.mipLevels ); + if ( dstSub >= numSubresourceTilings ) + continue; + + const auto& tiling = tilings[dstSub]; + if ( tiling.WidthInTiles == 0 || tiling.HeightInTiles == 0 ) + continue; // Packed or degenerate — skip + + // Compute which tiles in this subresource are touched by the source + // texture's region. Non-uniform sources may cover only a sub-rect of + // the atlas subresource, so we must not map tiles outside that rect. + UINT regionX = src.x >> mip; + UINT regionY = src.y >> mip; + UINT regionW = std::max( 1u, src.width >> mip ); + UINT regionH = std::max( 1u, src.height >> mip ); + + UINT tileW = tileShape.WidthInTexels; + UINT tileH = tileShape.HeightInTexels; + + UINT tileStartX = regionX / tileW; + UINT tileStartY = regionY / tileH; + UINT tileEndX = std::min( tiling.WidthInTiles - 1, ( regionX + regionW - 1 ) / tileW ); + UINT tileEndY = std::min( tiling.HeightInTiles - 1, ( regionY + regionH - 1 ) / tileH ); + + // Ensure all tiles covering this source region are mapped to real pool + // tiles. Mapping MUST happen before CopySubresourceRegion, otherwise + // the GPU silently discards writes to unmapped (default) tiles. + for ( UINT ty = tileStartY; ty <= tileEndY; ++ty ) { + for ( UINT tx = tileStartX; tx <= tileEndX; ++tx ) { + uint64_t tileKey = MakeTileKey( fmt, dstSub, tx, ty ); + auto stateIt = m_TileStates.find( tileKey ); + if ( stateIt != m_TileStates.end() && + stateIt->second.state == TileState::Resident ) + continue; // Shared tile already resident from an earlier source + + UINT poolTile = AllocateTile( fmt ); + if ( poolTile == UINT_MAX ) + continue; // Pool exhausted — leave on default tile + + MapTileToPool( atlas, dstSub, tx, ty, poolTile, fmt ); + + TileInfo info; + info.state = TileState::Resident; + info.poolTileIndex = poolTile; + info.subresource = dstSub; + info.tileX = tx; + info.tileY = ty; + info.format = fmt; + info.lastUsedTime = 0.0f; + m_TileStates[tileKey] = info; + ++tilesUploaded; + } + } + + // Upload this source's pixel data. Always upload — even when all + // tiles were already Resident from an earlier source. Multiple + // sources share the same 64KB tile but occupy different (x,y) regions + // within it. Gating on "anyNewTiles" caused the second source's + // region to never be written, leaving it as uninitialized pool memory + // (black) or transparent (invisible alpha-tested geometry). + UploadTileData( atlas, dstSub, src, mip ); + + // Track as loaded so UpdateStreaming won't re-upload and eviction can + // properly invalidate this source-mip if the tile is reclaimed. + uint64_t srcKey = MakeSourceKey( fmt, srcIdx, mip ); + m_LoadedSources.insert( srcKey ); + } + } + + // Generate missing mip levels for sources that have fewer mips than the atlas. + // This is called once per source at atlas creation time; the generated data is + // uploaded immediately so coarse mips are always available. + for ( UINT srcIdx = 0; srcIdx < static_cast( sources.size() ); ++srcIdx ) { + const auto& src = sources[srcIdx]; + if ( src.sourceMipLevels < atlas.mipLevels ) { + GenerateMissingMips( atlas, src, fmt, srcIdx, + tilings, numSubresourceTilings, tileShape ); + } + } + + LogInfo() << "[StreamingResources] Preloaded " << PRELOADED_COARSE_MIPS + << " coarsest standard mip(s) (" << tilesUploaded << " tiles) for format " + << static_cast( fmt ); +} + +// ============================================================================= +// Tile Data Upload +// ============================================================================= + +void D3D11StreamingResourcesManager::UploadTileData( + const TiledAtlas& atlas, UINT subresource, + const SourceTextureInfo& src, UINT srcMip ) { + + if ( srcMip >= src.sourceMipLevels ) + return; // Source texture doesn't have this mip level + + UINT srcSub = D3D11CalcSubresource( srcMip, 0, src.sourceMipLevels ); + + // Destination offset within the atlas subresource at this mip level + UINT mipX = src.x >> srcMip; + UINT mipY = src.y >> srcMip; + + // Source region dimensions at this mip level. + // Using an explicit D3D11_BOX ensures non-uniform texture sizes are handled + // correctly — for packed atlas contents src.width/height may differ per entry, + // and a nullptr pSrcBox would copy the full source extent to an unintended region. + UINT mipW = std::max( 1u, src.width >> srcMip ); + UINT mipH = std::max( 1u, src.height >> srcMip ); + + D3D11_BOX srcBox = {}; + srcBox.left = 0; + srcBox.top = 0; + srcBox.front = 0; + srcBox.right = mipW; + srcBox.bottom = mipH; + srcBox.back = 1; + + // All tiles covering (mipX, mipY, mipW, mipH) must already be mapped to real + // pool tiles before this call — writes to unmapped tiles are silently discarded. + m_Context->CopySubresourceRegion( + atlas.texture.Get(), subresource, + mipX, mipY, 0, + src.texture.Get(), srcSub, + &srcBox + ); +} + +// ============================================================================= +// Missing Mip Generation +// ============================================================================= + +void D3D11StreamingResourcesManager::GenerateMissingMips( + const TiledAtlas& atlas, const SourceTextureInfo& src, + DXGI_FORMAT fmt, UINT srcIndex, + const std::vector& tilings, + UINT numSubresourceTilings, + const D3D11_TILE_SHAPE& tileShape ) { + + if ( src.sourceMipLevels >= atlas.mipLevels ) + return; // No missing mips + + // Capture the source texture to CPU memory (creates an internal staging copy) + DirectX::ScratchImage captured; + if ( FAILED( DirectX::CaptureTexture( m_Device.Get(), m_Context.Get(), src.texture.Get(), captured ) ) ) { + LogWarn() << "[StreamingResources] CaptureTexture failed for source " << srcIndex; + return; + } + + // Grab the last available mip as the downsampling base + const DirectX::Image* lastMipImg = captured.GetImage( src.sourceMipLevels - 1, 0, 0 ); + if ( !lastMipImg ) return; + + // GenerateMipMaps requires uncompressed input — decompress BC textures first + DirectX::ScratchImage decompressed; + const DirectX::Image* baseImg = lastMipImg; + if ( DirectX::IsCompressed( lastMipImg->format ) ) { + if ( FAILED( DirectX::Decompress( *lastMipImg, DXGI_FORMAT_R8G8B8A8_UNORM, decompressed ) ) ) { + LogWarn() << "[StreamingResources] Decompress failed for source " << srcIndex; + return; + } + baseImg = decompressed.GetImage( 0, 0, 0 ); + } + + // Generate: level 0 = base (already copied to atlas), levels 1..N = the missing mips + UINT levelsToGen = atlas.mipLevels - src.sourceMipLevels + 1; + DirectX::ScratchImage mipChain; + if ( FAILED( DirectX::GenerateMipMaps( *baseImg, DirectX::TEX_FILTER_BOX, levelsToGen, mipChain ) ) ) { + LogWarn() << "[StreamingResources] GenerateMipMaps failed for source " << srcIndex; + return; + } + + // Re-compress the generated levels back to the atlas BC format. + // Try GPU-accelerated compression first; fall back to CPU if unsupported. + const DirectX::ScratchImage* finalChain = &mipChain; + DirectX::ScratchImage recompressed; + if ( DirectX::IsCompressed( fmt ) ) { + HRESULT hr = DirectX::Compress( m_Device.Get(), + mipChain.GetImages(), mipChain.GetImageCount(), mipChain.GetMetadata(), + fmt, DirectX::TEX_COMPRESS_DEFAULT, DirectX::TEX_ALPHA_WEIGHT_DEFAULT, + recompressed ); + if ( FAILED( hr ) ) { + // GPU BC compression not supported — use CPU path + recompressed = DirectX::ScratchImage{}; + if ( FAILED( DirectX::Compress( + mipChain.GetImages(), mipChain.GetImageCount(), mipChain.GetMetadata(), + fmt, DirectX::TEX_COMPRESS_DEFAULT, DirectX::TEX_ALPHA_WEIGHT_DEFAULT, + recompressed ) ) ) { + LogWarn() << "[StreamingResources] Compress failed for source " << srcIndex; + return; + } + } + finalChain = &recompressed; + } + + // Upload each generated mip level + for ( UINT mip = src.sourceMipLevels; mip < atlas.mipLevels; ++mip ) { + // chainIdx 0 = the base (already in atlas), so generated levels start at 1 + UINT chainIdx = mip - src.sourceMipLevels + 1; + const DirectX::Image* genImg = finalChain->GetImage( chainIdx, 0, 0 ); + if ( !genImg || !genImg->pixels ) continue; + + UINT dstSub = D3D11CalcSubresource( mip, src.slice, atlas.mipLevels ); + if ( dstSub >= numSubresourceTilings ) + continue; + + // Map tiles before uploading + const auto& tiling = tilings[dstSub]; + if ( tiling.WidthInTiles > 0 && tiling.HeightInTiles > 0 ) { + UINT regionX = src.x >> mip; + UINT regionY = src.y >> mip; + UINT regionW = std::max( 1u, src.width >> mip ); + UINT regionH = std::max( 1u, src.height >> mip ); + + UINT tw = tileShape.WidthInTexels; + UINT th = tileShape.HeightInTexels; + + UINT tileStartX = regionX / tw; + UINT tileStartY = regionY / th; + UINT tileEndX = std::min( tiling.WidthInTiles - 1, ( regionX + regionW - 1 ) / tw ); + UINT tileEndY = std::min( tiling.HeightInTiles - 1, ( regionY + regionH - 1 ) / th ); + + for ( UINT ty = tileStartY; ty <= tileEndY; ++ty ) { + for ( UINT tx = tileStartX; tx <= tileEndX; ++tx ) { + uint64_t tileKey = MakeTileKey( fmt, dstSub, tx, ty ); + auto stateIt = m_TileStates.find( tileKey ); + if ( stateIt != m_TileStates.end() && + stateIt->second.state == TileState::Resident ) + continue; + + UINT poolTile = AllocateTile( fmt ); + if ( poolTile == UINT_MAX ) + continue; + + MapTileToPool( atlas, dstSub, tx, ty, poolTile, fmt ); + + TileInfo info; + info.state = TileState::Resident; + info.poolTileIndex = poolTile; + info.subresource = dstSub; + info.tileX = tx; + info.tileY = ty; + info.format = fmt; + info.lastUsedTime = 0.0f; + m_TileStates[tileKey] = info; + } + } + } + + // Create temporary immutable texture and copy to atlas + D3D11_TEXTURE2D_DESC tmpDesc = {}; + tmpDesc.Width = static_cast( genImg->width ); + tmpDesc.Height = static_cast( genImg->height ); + tmpDesc.MipLevels = 1; + tmpDesc.ArraySize = 1; + tmpDesc.Format = genImg->format; + tmpDesc.SampleDesc.Count = 1; + tmpDesc.Usage = D3D11_USAGE_IMMUTABLE; + tmpDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + + D3D11_SUBRESOURCE_DATA initData = {}; + initData.pSysMem = genImg->pixels; + initData.SysMemPitch = static_cast( genImg->rowPitch ); + + ComPtr tmpTex; + if ( SUCCEEDED( m_Device->CreateTexture2D( &tmpDesc, &initData, tmpTex.GetAddressOf() ) ) ) { + UINT mipX = src.x >> mip; + UINT mipY = src.y >> mip; + D3D11_BOX box = { 0, 0, 0, + static_cast( genImg->width ), + static_cast( genImg->height ), 1 }; + m_Context->CopySubresourceRegion( + atlas.texture.Get(), dstSub, mipX, mipY, 0, + tmpTex.Get(), 0, &box ); + } + + // Mark as loaded + uint64_t srcKey = MakeSourceKey( fmt, srcIndex, mip ); + m_LoadedSources.insert( srcKey ); + } + + LogInfo() << "[StreamingResources] Generated " << ( atlas.mipLevels - src.sourceMipLevels ) + << " missing mip(s) for source " << srcIndex + << " (had " << src.sourceMipLevels << ", atlas needs " << atlas.mipLevels << ")"; +} + +// ============================================================================= +// Per-Frame Streaming Update +// ============================================================================= + +void D3D11StreamingResourcesManager::UpdateStreaming( + const DirectX::XMFLOAT3& cameraPosition, float drawDistance, float currentTime, + const std::unordered_set* requestedSources ) { + + if ( !m_Initialized ) + return; + + // ------------------------------------------------------------------------- + // The streaming unit is a (source, mip) pair — NOT individual tiles. + // + // Multiple non-uniform source textures share 64KB atlas tiles. The old + // tile-level approach allocated a fresh pool tile per-source-per-tile, + // causing the same atlas tile to be re-mapped to different pool memory + // each frame (frame-flickering) and leaving newly-allocated tiles filled + // with uninitialized zeros (black). + // + // The correct flow is: + // 1. Decide which (source, mip) pairs need loading. + // 2. For each, ensure ALL atlas tiles that the source covers are mapped + // to real pool tiles (allocate only if currently default-mapped). + // 3. Upload the source mip data via CopySubresourceRegion ONCE. + // 4. Record the (source, mip) as loaded so we never re-upload it. + // ------------------------------------------------------------------------- + + // --- 1. Build fresh load queue --- + // Clear any stale requests from previous frames. The priority_queue has no + // clear(), so swap with an empty one. + { decltype( m_LoadQueue ) empty; m_LoadQueue.swap( empty ); } + + for ( auto& [fmt, sources] : m_SourceTextures ) { + auto atlasIt = m_TiledAtlases.find( fmt ); + if ( atlasIt == m_TiledAtlases.end() ) + continue; + + const TiledAtlas& atlas = atlasIt->second; + + // Query tile layout once per atlas + UINT numSubresourceTilings = atlas.mipLevels * atlas.arraySlices; + D3D11_PACKED_MIP_DESC packedDesc = {}; + D3D11_TILE_SHAPE tileShape = {}; + UINT totalTiles = 0; + { + std::vector tmp( numSubresourceTilings ); + m_Device2->GetResourceTiling( + atlas.texture.Get(), + &totalTiles, &packedDesc, &tileShape, + &numSubresourceTilings, 0, tmp.data() + ); + } + + UINT standardMips = atlas.mipLevels - packedDesc.NumPackedMips; + + // Determine the preloaded mip range so we skip those + UINT preloadStart = ( standardMips > PRELOADED_COARSE_MIPS ) + ? standardMips - PRELOADED_COARSE_MIPS + : 0; + + for ( UINT srcIdx = 0; srcIdx < static_cast( sources.size() ); ++srcIdx ) { + const auto& src = sources[srcIdx]; + + // Feedback-driven filtering: if requestedSources is provided, only stream + // sources that the GPU reported as needing data. Otherwise load everything. + if ( requestedSources ) { + UINT globalIdx = m_GlobalSourceOffsets[fmt] + srcIdx; + if ( requestedSources->count( globalIdx ) == 0 ) + continue; // Not requested by GPU feedback — skip + } + + for ( UINT mip = 0; mip < standardMips; ++mip ) { + // Skip mips that were preloaded at atlas creation + if ( mip >= preloadStart ) + continue; + + // Skip if the source doesn't have this mip — generated mips + // are handled by GenerateMissingMips during PreloadCoarseMips. + // For mips that were already generated, m_LoadedSources will + // short-circuit below. + if ( mip >= src.sourceMipLevels ) + continue; + + // Skip if already uploaded + uint64_t srcKey = MakeSourceKey( fmt, srcIdx, mip ); + if ( m_LoadedSources.count( srcKey ) ) + continue; + + // Priority: coarser mips first (higher priority number) + float priority = static_cast( atlas.mipLevels - mip ); + + StreamingRequest req; + req.sourceIndex = srcIdx; + req.priority = priority; + req.mipLevel = mip; + req.format = fmt; + m_LoadQueue.push( req ); + } + } + } + + // --- 2. Process load queue (no per-frame cap) --- + // All visible source-mips are loaded immediately. A per-frame budget previously + // caused multi-frame pop-in and invisible alpha-tested geometry (BC2/BC3 default + // tiles are transparent). The cost is bounded by the number of newly-visible + // sources, which is typically small after the initial load. + while ( !m_LoadQueue.empty() ) { + StreamingRequest req = m_LoadQueue.top(); + m_LoadQueue.pop(); + + // Double-check: may have been loaded by a higher-priority path + uint64_t srcKey = MakeSourceKey( req.format, req.sourceIndex, req.mipLevel ); + if ( m_LoadedSources.count( srcKey ) ) + continue; + + auto atlasIt = m_TiledAtlases.find( req.format ); + if ( atlasIt == m_TiledAtlases.end() ) + continue; + + auto srcIt = m_SourceTextures.find( req.format ); + if ( srcIt == m_SourceTextures.end() || req.sourceIndex >= srcIt->second.size() ) + continue; + + const TiledAtlas& atlas = atlasIt->second; + const SourceTextureInfo& src = srcIt->second[req.sourceIndex]; + + UINT dstSub = D3D11CalcSubresource( req.mipLevel, src.slice, atlas.mipLevels ); + + // Query tiling for tile shape + UINT numSubresourceTilings = atlas.mipLevels * atlas.arraySlices; + std::vector tilings( numSubresourceTilings ); + UINT totalTiles = 0; + D3D11_PACKED_MIP_DESC packedDesc = {}; + D3D11_TILE_SHAPE tileShape = {}; + m_Device2->GetResourceTiling( + atlas.texture.Get(), + &totalTiles, &packedDesc, &tileShape, + &numSubresourceTilings, 0, tilings.data() + ); + + if ( dstSub >= numSubresourceTilings ) + continue; + + const auto& tiling = tilings[dstSub]; + if ( tiling.WidthInTiles == 0 || tiling.HeightInTiles == 0 ) + continue; + + // Compute tile range this source covers at this mip + UINT regionX = src.x >> req.mipLevel; + UINT regionY = src.y >> req.mipLevel; + UINT regionW = std::max( 1u, src.width >> req.mipLevel ); + UINT regionH = std::max( 1u, src.height >> req.mipLevel ); + UINT tileW = tileShape.WidthInTexels; + UINT tileH = tileShape.HeightInTexels; + + UINT tileStartX = regionX / tileW; + UINT tileStartY = regionY / tileH; + UINT tileEndX = std::min( tiling.WidthInTiles - 1, ( regionX + regionW - 1 ) / tileW ); + UINT tileEndY = std::min( tiling.HeightInTiles - 1, ( regionY + regionH - 1 ) / tileH ); + + // Ensure all covered tiles are mapped to real pool tiles. + // Tiles may already be resident from another source that shares them — skip those. + for ( UINT ty = tileStartY; ty <= tileEndY; ++ty ) { + for ( UINT tx = tileStartX; tx <= tileEndX; ++tx ) { + uint64_t tileKey = MakeTileKey( req.format, dstSub, tx, ty ); + auto stateIt = m_TileStates.find( tileKey ); + if ( stateIt != m_TileStates.end() && + stateIt->second.state == TileState::Resident ) { + // Tile already resident — still record our source key so eviction + // of this shared tile invalidates all dependent sources. + stateIt->second.sourceKeys.push_back( srcKey ); + continue; + } + + UINT poolTile = AllocateTile( req.format ); + if ( poolTile == UINT_MAX ) + continue; // Pool exhausted — leave on default + + MapTileToPool( atlas, dstSub, tx, ty, poolTile, req.format ); + + TileInfo info; + info.state = TileState::Resident; + info.poolTileIndex = poolTile; + info.subresource = dstSub; + info.tileX = tx; + info.tileY = ty; + info.format = req.format; + info.lastUsedTime = currentTime; + info.sourceKeys.push_back( srcKey ); + m_TileStates[tileKey] = info; + } + } + + // Upload source data once (all tiles are now mapped) + UploadTileData( atlas, dstSub, src, req.mipLevel ); + + m_LoadedSources.insert( srcKey ); + + // Touch all tiles this source covers so they won't be unloaded + for ( UINT ty = tileStartY; ty <= tileEndY; ++ty ) { + for ( UINT tx = tileStartX; tx <= tileEndX; ++tx ) { + uint64_t tileKey = MakeTileKey( req.format, dstSub, tx, ty ); + auto stateIt = m_TileStates.find( tileKey ); + if ( stateIt != m_TileStates.end() ) { + stateIt->second.lastUsedTime = currentTime; + } + } + } + + } + + // --- 3. Touch resident tiles based on feedback --- + // When feedback-driven: only touch tiles belonging to requested sources. + // When no feedback (requestedSources == nullptr): touch everything (legacy behavior). + if ( requestedSources ) { + // Build a set of (format, srcIdx) pairs that are requested + for ( auto& [fmt, sources] : m_SourceTextures ) { + auto atlasIt = m_TiledAtlases.find( fmt ); + if ( atlasIt == m_TiledAtlases.end() ) + continue; + + const TiledAtlas& atlas = atlasIt->second; + + for ( UINT srcIdx = 0; srcIdx < static_cast( sources.size() ); ++srcIdx ) { + UINT globalIdx = m_GlobalSourceOffsets[fmt] + srcIdx; + if ( requestedSources->count( globalIdx ) == 0 ) + continue; // Not visible — don't touch, let it age out + + const auto& src = sources[srcIdx]; + + // Touch all tiles this source covers across all loaded mips + UINT numSub = atlas.mipLevels * atlas.arraySlices; + std::vector tilings( numSub ); + UINT tt = 0; + D3D11_PACKED_MIP_DESC pd = {}; + D3D11_TILE_SHAPE ts = {}; + m_Device2->GetResourceTiling( + atlas.texture.Get(), + &tt, &pd, &ts, &numSub, 0, tilings.data() ); + + UINT standardMips = atlas.mipLevels - pd.NumPackedMips; + for ( UINT mip = 0; mip < standardMips; ++mip ) { + UINT dstSub = D3D11CalcSubresource( mip, src.slice, atlas.mipLevels ); + if ( dstSub >= numSub ) continue; + + UINT regionX = src.x >> mip; + UINT regionY = src.y >> mip; + UINT regionW = std::max( 1u, src.width >> mip ); + UINT regionH = std::max( 1u, src.height >> mip ); + UINT tileW = ts.WidthInTexels; + UINT tileH = ts.HeightInTexels; + if ( tileW == 0 || tileH == 0 ) continue; + + UINT tileStartX = regionX / tileW; + UINT tileStartY = regionY / tileH; + UINT tileEndX = std::min( tilings[dstSub].WidthInTiles > 0 ? tilings[dstSub].WidthInTiles - 1 : 0, + ( regionX + regionW - 1 ) / tileW ); + UINT tileEndY = std::min( tilings[dstSub].HeightInTiles > 0 ? tilings[dstSub].HeightInTiles - 1 : 0, + ( regionY + regionH - 1 ) / tileH ); + + for ( UINT ty = tileStartY; ty <= tileEndY; ++ty ) { + for ( UINT tx = tileStartX; tx <= tileEndX; ++tx ) { + uint64_t tileKey = MakeTileKey( fmt, dstSub, tx, ty ); + auto stateIt = m_TileStates.find( tileKey ); + if ( stateIt != m_TileStates.end() && stateIt->second.state == TileState::Resident ) { + stateIt->second.lastUsedTime = currentTime; + } + } + } + } + } + } + } else { + // No feedback — touch everything (backward compatibility) + for ( auto& [key, tile] : m_TileStates ) { + if ( tile.state == TileState::Resident ) { + tile.lastUsedTime = currentTime; + } + } + } + + // --- 4. Identify unload candidates --- + // (Currently all resident tiles are touched every frame above, so nothing + // will be unloaded. This section is kept for future distance-based eviction + // where step 3 would only touch tiles near the camera.) + m_UnloadCandidates.clear(); + for ( auto& [key, tile] : m_TileStates ) { + if ( tile.state == TileState::Resident && + ( currentTime - tile.lastUsedTime ) > UNLOAD_COOLDOWN_SECONDS ) { + + // Don't unload preloaded coarse mips + auto atlasIt = m_TiledAtlases.find( tile.format ); + if ( atlasIt != m_TiledAtlases.end() ) { + UINT mipLevels = atlasIt->second.mipLevels; + UINT mip = tile.subresource % mipLevels; + + // Query tile layout once per atlas + UINT numSubresourceTilings = atlasIt->second.mipLevels * atlasIt->second.arraySlices; + D3D11_PACKED_MIP_DESC packedDesc = {}; + D3D11_TILE_SHAPE tileShape = {}; + UINT totalTiles = 0; + { + std::vector tmp( numSubresourceTilings ); + m_Device2->GetResourceTiling( + atlasIt->second.texture.Get(), + &totalTiles, &packedDesc, &tileShape, + &numSubresourceTilings, 0, tmp.data() + ); + } + + UINT standardMips = mipLevels - packedDesc.NumPackedMips; + UINT preloadStart = ( standardMips > PRELOADED_COARSE_MIPS ) + ? standardMips - PRELOADED_COARSE_MIPS : 0; + if ( mip >= preloadStart ) + continue; // Don't unload preloaded coarse mips + } + + tile.state = TileState::PendingUnload; + m_UnloadCandidates.push_back( key ); + } + } + + // --- 5. Process unload queue (frame-budgeted) --- + UINT unmapsThisFrame = 0; + for ( auto key : m_UnloadCandidates ) { + if ( unmapsThisFrame >= MAX_TILE_UNMAPS_PER_FRAME ) + break; + + auto stateIt = m_TileStates.find( key ); + if ( stateIt == m_TileStates.end() ) + continue; + + TileInfo& tile = stateIt->second; + if ( tile.state != TileState::PendingUnload ) + continue; + + auto atlasIt = m_TiledAtlases.find( tile.format ); + if ( atlasIt == m_TiledAtlases.end() ) + continue; + + // Remap to default tile + MapTileToDefault( atlasIt->second, tile.subresource, + tile.tileX, tile.tileY, tile.format ); + + // Free the pool tile + FreeTile( tile.format, tile.poolTileIndex ); + + tile.state = TileState::Unmapped; + tile.poolTileIndex = 0; + unmapsThisFrame++; + + // Remove m_LoadedSources entries that depended on this tile so the + // source will be re-uploaded if it becomes visible again. + for ( uint64_t sk : tile.sourceKeys ) { + m_LoadedSources.erase( sk ); + } + tile.sourceKeys.clear(); + } +} diff --git a/D3D11Engine/D3D11StreamingResourcesManager.h b/D3D11Engine/D3D11StreamingResourcesManager.h new file mode 100644 index 00000000..f307011d --- /dev/null +++ b/D3D11Engine/D3D11StreamingResourcesManager.h @@ -0,0 +1,261 @@ +#pragma once + +#include "pch.h" +#include +#include +#include +#include +#include +#include +#include "D3D11TextureAtlasManager.h" + +// Forward declarations +class zCTexture; + +/** + * Streaming resources manager using D3D11 Tiled Resources (Reserved Resources). + * + * Creates Texture2DArray atlases backed by tile pools instead of fully-committed + * GPU memory. Tiles are streamed in/out based on camera proximity and screen-space + * priority, keeping memory footprint bounded even with large texture sets. + * + * All unmapped tiles point to a single default tile (magenta debug fill), so the + * SRV is always valid and shaders never sample garbage data. + * + * Coarsest mip levels are preloaded immediately so objects always display at least + * a low-resolution texture. + */ +class D3D11StreamingResourcesManager { +public: + D3D11StreamingResourcesManager() = default; + ~D3D11StreamingResourcesManager(); + + // Public struct: source texture info (needed by graphics engine for feedback lookup) + struct SourceTextureInfo { + Microsoft::WRL::ComPtr texture; + UINT x, y, slice; + UINT width, height; + UINT sourceMipLevels; + }; + + // --- Capability query (static, can be called before Init) --- + static bool GetIsStreamingSupported( ID3D11Device1* device ); + + // --- Lifecycle --- + bool Init( ID3D11Device1* device, ID3D11DeviceContext1* context ); + void Shutdown(); + + // --- Atlas creation --- + // Creates a tiled Texture2DArray with the same bin-packing layout as + // TextureManager::CreateAtlasArray, but backed by tile pools. + // Returns a compatible AtlasResult so the rest of the pipeline is unchanged. + TextureManager::AtlasResult CreateStreamingAtlasArray( + std::basic_string_view sourceTextures, + UINT atlasSize = 2048, UINT mipLevels = 6 ); + + // --- Per-frame streaming update --- + // Called once per frame before draw calls. Evaluates tile priorities, + // streams in/out tiles within the per-frame budget. + // If requestedSources is non-null, only sources in the set are loaded (feedback-driven). + // If null, all sources are loaded (backward compatibility / non-streaming fallback). + void UpdateStreaming( const DirectX::XMFLOAT3& cameraPosition, float drawDistance, float currentTime, + const std::unordered_set* requestedSources = nullptr ); + + // --- Feedback query methods --- + // Global source offset for a given format (cumulative count of sources in prior formats). + UINT GetGlobalSourceOffset( DXGI_FORMAT fmt ) const; + // Total number of sources across all formats. + UINT GetTotalSourceCount() const; + + // Source texture list for a given format (for populating SubmeshGPUData.globalSourceIndex). + const std::vector& GetSourceTextures( DXGI_FORMAT fmt ) const; + + // --- World lifecycle --- + void OnWorldUnloaded(); + +private: + // --- Device interfaces (QueryInterface'd from ID3D11Device1/Context1) --- + Microsoft::WRL::ComPtr m_Device2; + Microsoft::WRL::ComPtr m_Context2; + + // Keep a ref to ID3D11Device for non-tiled operations (staging textures etc.) + Microsoft::WRL::ComPtr m_Device; + Microsoft::WRL::ComPtr m_Context; + + bool m_Initialized = false; + + // ========================================================================= + // Tile Pool + // ========================================================================= + // One tile pool per DXGI_FORMAT group (mirrors the atlas grouping). + struct TilePool { + Microsoft::WRL::ComPtr buffer; // D3D11_RESOURCE_MISC_TILE_POOL + UINT totalTiles = 0; // capacity in 64KB tiles + UINT usedTiles = 0; // high-water allocation mark + std::vector freeTiles; // recycled tile indices (LIFO stack) + }; + std::unordered_map m_TilePools; + + // ========================================================================= + // Tiled Atlas Textures + // ========================================================================= + struct TiledAtlas { + Microsoft::WRL::ComPtr texture; // D3D11_RESOURCE_MISC_TILED + Microsoft::WRL::ComPtr srv; + UINT atlasSize = 0; + UINT mipLevels = 0; + UINT arraySlices = 0; + DXGI_FORMAT format = DXGI_FORMAT_UNKNOWN; + }; + std::unordered_map m_TiledAtlases; + + // ========================================================================= + // Default (null) Tile + // ========================================================================= + // A single 64KB tile filled with magenta (1,0,1,1), mapped to all unmapped + // tile regions so shaders always read valid data. + struct DefaultTile { + UINT poolIndex = 0; + bool initialized = false; + }; + std::unordered_map m_DefaultTiles; + + // ========================================================================= + // Tile State Tracking + // ========================================================================= + enum class TileState : uint8_t { + Unmapped, // mapped to default magenta tile + PendingLoad, // queued for streaming in + Resident, // fully loaded with real data + PendingUnload // cooldown timer running before unmap + }; + + struct TileInfo { + UINT poolTileIndex = 0; + TileState state = TileState::Unmapped; + float lastUsedTime = 0.0f; + float priority = 0.0f; + UINT subresource = 0; + UINT tileX = 0; + UINT tileY = 0; + DXGI_FORMAT format = DXGI_FORMAT_UNKNOWN; + // Source-mip keys that depend on this tile. When evicted, these are + // erased from m_LoadedSources so re-streaming can happen. + std::vector sourceKeys; + }; + // key = MakeTileKey(format, subresource, tileX, tileY) + std::unordered_map m_TileStates; + + // ========================================================================= + // Streaming Request Queue + // ========================================================================= + // Streaming operates at the source+mip granularity, NOT individual tiles. + // Multiple non-uniform sources may share the same atlas tile; tile-level + // streaming caused tiles to be re-mapped and overwritten each frame as + // different sources "claimed" the shared tile, producing frame-flickering + // and eventual black (uninitialized pool memory). + struct StreamingRequest { + UINT sourceIndex = 0; // index into m_SourceTextures[format] + float priority = 0.0f; + UINT mipLevel = 0; + DXGI_FORMAT format = DXGI_FORMAT_UNKNOWN; + + bool operator<( const StreamingRequest& other ) const { + return priority < other.priority; // max-heap: highest priority first + } + }; + std::priority_queue m_LoadQueue; + std::vector m_UnloadCandidates; + + // Tracks which source+mip combos have been fully uploaded. + // Key = MakeSourceKey(format, sourceIndex, mip) + std::unordered_set m_LoadedSources; + + // ========================================================================= + // Source Texture References (for uploading tile data) + // ========================================================================= + // Maps (format, PackItem index) to the source texture so we can read tile + // data when streaming in. Populated during CreateStreamingAtlasArray. + // (SourceTextureInfo is declared in the public section above.) + // Key: format -> vector of source textures (matches atlas packing order) + std::unordered_map> m_SourceTextures; + + // ========================================================================= + // Global Source Offsets (for feedback texture indexing) + // ========================================================================= + // Maps each format to its cumulative offset in the flat global source array. + // Computed during CreateStreamingAtlasArray. + std::unordered_map m_GlobalSourceOffsets; + UINT m_TotalSourceCount = 0; + + // ========================================================================= + // Staging Ring Buffer + // ========================================================================= + static constexpr UINT STAGING_RING_SIZE = 8; + struct StagingBuffer { + Microsoft::WRL::ComPtr texture; + bool inUse = false; + }; + std::array m_StagingRing{}; + UINT m_StagingRingHead = 0; + + // ========================================================================= + // Budget & Tuning Constants + // ========================================================================= + static constexpr UINT TILE_SIZE_BYTES = 65536; // 64KB per tile + static constexpr UINT MAX_TILE_UPLOADS_PER_FRAME = 8; // tiles uploaded per frame in UpdateStreaming + static constexpr UINT MAX_TILE_UNMAPS_PER_FRAME = 4; + static constexpr float UNLOAD_COOLDOWN_SECONDS = 5.0f; + static constexpr UINT INITIAL_POOL_TILES = 512; // 32 MB per pool + static constexpr UINT PRELOADED_COARSE_MIPS = 1; // preload ALL standard mips at creation + + // ========================================================================= + // Internal Helpers + // ========================================================================= + + // Tile pool management + UINT AllocateTile( DXGI_FORMAT fmt ); + void FreeTile( DXGI_FORMAT fmt, UINT tileIndex ); + bool CreateTilePool( DXGI_FORMAT fmt, UINT numTiles ); + void GrowTilePool( DXGI_FORMAT fmt, UINT additionalTiles ); + + // Default tile + void InitDefaultTile( DXGI_FORMAT fmt ); + void FillDefaultTileData( DXGI_FORMAT fmt, std::vector& outData ); + + // Tile mapping + void MapTileToDefault( const TiledAtlas& atlas, UINT subresource, + UINT tileX, UINT tileY, DXGI_FORMAT fmt ); + void MapTileToPool( const TiledAtlas& atlas, UINT subresource, + UINT tileX, UINT tileY, UINT poolTileIndex, + DXGI_FORMAT fmt ); + void MapAllTilesToDefault( const TiledAtlas& atlas, DXGI_FORMAT fmt ); + + // Upload one source texture's mip region into the tiled atlas. + // All tiles covering the source region must be mapped to pool tiles before calling. + // Uses a bounded D3D11_BOX so non-uniform texture sizes are handled correctly. + void UploadTileData( const TiledAtlas& atlas, UINT subresource, + const SourceTextureInfo& src, UINT srcMip ); + + // Generate missing mip levels for a source texture that has fewer mips than + // the atlas requires. Uses DirectXTex to capture the source's last mip, + // generate a proper box-filtered mip chain, re-compress to the atlas format, + // and upload each generated level. Tiles are mapped before upload. + void GenerateMissingMips( const TiledAtlas& atlas, const SourceTextureInfo& src, + DXGI_FORMAT fmt, UINT srcIndex, + const std::vector& tilings, + UINT numSubresourceTilings, + const D3D11_TILE_SHAPE& tileShape ); + + // Key generation + static uint64_t MakeTileKey( DXGI_FORMAT fmt, UINT subresource, + UINT tileX, UINT tileY ); + static uint64_t MakeSourceKey( DXGI_FORMAT fmt, UINT sourceIndex, UINT mip ); + + // Preload coarsest mip levels (called from CreateStreamingAtlasArray) + void PreloadCoarseMips( DXGI_FORMAT fmt ); + + // Get number of tiles in a subresource dimension + void GetSubresourceTileCount( const TiledAtlas& atlas, UINT mipLevel, + UINT& tilesX, UINT& tilesY ) const; +}; diff --git a/D3D11Engine/GothicGraphicsState.h b/D3D11Engine/GothicGraphicsState.h index 6e23c260..116cc688 100644 --- a/D3D11Engine/GothicGraphicsState.h +++ b/D3D11Engine/GothicGraphicsState.h @@ -641,6 +641,7 @@ struct GothicRendererSettings { WireframeWorld = false; DrawShadowGeometry = true; UseIndirectVobShadows = false; + EnableStreamingResources = false; FixViewFrustum = false; DisableWatermark = true; DisableRendering = false; @@ -830,6 +831,7 @@ struct GothicRendererSettings { E_ShadowFrustumCulling ShadowFrustumCullingMode; bool DrawShadowGeometry; bool UseIndirectVobShadows; + bool EnableStreamingResources; bool VegetationAlphaToCoverage; bool DisableWatermark; bool DisableRendering; @@ -970,6 +972,7 @@ struct GothicRendererSettings { bool UseLayeredRendering; bool UseShadowAtlas; bool ForceFeatureLevel10; + bool StreamingResourcesSupported; } FeatureSet; } DebugSettings; }; diff --git a/D3D11Engine/Shaders/CS_CullVobs.hlsl b/D3D11Engine/Shaders/CS_CullVobs.hlsl index 929a9025..1ce3bbde 100644 --- a/D3D11Engine/Shaders/CS_CullVobs.hlsl +++ b/D3D11Engine/Shaders/CS_CullVobs.hlsl @@ -13,7 +13,7 @@ cbuffer CullCB : register( b0 ) float globalWindStrength; uint windAdvanced; uint numVobs; - uint pad; + uint feedbackFrameNumber; }; struct VobGPUData @@ -41,7 +41,7 @@ struct SubmeshGPUData float vEnd; uint argIndex; uint instanceBaseOffset; - uint pad; + uint globalSourceIndex; }; struct VobInstanceInfoAtlas @@ -56,6 +56,7 @@ struct VobInstanceInfoAtlas float vStart; float uEnd; float vEnd; + uint globalSourceIndex; }; StructuredBuffer VobBuffer : register( t0 ); @@ -63,6 +64,11 @@ StructuredBuffer SubmeshBuffer : register( t1 ); RWStructuredBuffer InstanceOutput : register( u0 ); RWByteAddressBuffer IndirectArgsUAV : register( u1 ); +// GPU feedback for streaming: source-indexed RWTexture2D +// The CS stamps visible sources once per (vob, submesh) — orders of magnitude +// cheaper than per-pixel atomics in the pixel shader. +RWTexture2D FeedbackUAV : register( u5 ); + [numthreads( 64, 1, 1 )] void CSMain( uint3 DTid : SV_DispatchThreadID ) { @@ -124,7 +130,15 @@ void CSMain( uint3 DTid : SV_DispatchThreadID ) inst.vStart = sm.vStart; inst.uEnd = sm.uEnd; inst.vEnd = sm.vEnd; + inst.globalSourceIndex = sm.globalSourceIndex; InstanceOutput[sm.instanceBaseOffset + slot] = inst; + + // Stamp feedback: one atomic per visible (vob, submesh) pair. + // Far cheaper than per-pixel atomics in the PS. + if ( feedbackFrameNumber > 0 ) + { + InterlockedMax( FeedbackUAV[uint2( sm.globalSourceIndex, 0 )], feedbackFrameNumber ); + } } } diff --git a/D3D11Engine/Shaders/VS_ExInstancedObjIndirectAtlas.hlsl b/D3D11Engine/Shaders/VS_ExInstancedObjIndirectAtlas.hlsl index 3f365e91..bd4349ef 100644 --- a/D3D11Engine/Shaders/VS_ExInstancedObjIndirectAtlas.hlsl +++ b/D3D11Engine/Shaders/VS_ExInstancedObjIndirectAtlas.hlsl @@ -32,6 +32,7 @@ struct VobInstanceInfoAtlas { float vStart; float uEnd; float vEnd; + uint globalSourceIndex; }; StructuredBuffer instances : register(t1); From b7ba63c19848aec8544ec79475c59f84bf5555a9 Mon Sep 17 00:00:00 2001 From: kirides <13602143+kirides@users.noreply.github.com> Date: Fri, 6 Mar 2026 16:14:27 +0100 Subject: [PATCH 04/42] remove streaming from engine for now, too expensive in the current form. profiled some more reduced CPU overhead, implement Hi-Z Culling for indirect VOB drawing --- D3D11Engine/ConstantBufferStructs.h | 12 + D3D11Engine/D3D11GraphicsEngine.cpp | 393 +++++++++++++-------------- D3D11Engine/D3D11GraphicsEngine.h | 27 +- D3D11Engine/D3D11ShaderManager.cpp | 4 +- D3D11Engine/GothicAPI.cpp | 82 +++--- D3D11Engine/GothicGraphicsState.h | 2 - D3D11Engine/ShaderIDs.h | 1 + D3D11Engine/Shaders/CS_BuildHiZ.hlsl | 57 ++++ D3D11Engine/Shaders/CS_CullVobs.hlsl | 106 ++++++++ D3D11Engine/Toolbox.cpp | 19 ++ D3D11Engine/Toolbox.h | 5 + 11 files changed, 449 insertions(+), 259 deletions(-) create mode 100644 D3D11Engine/Shaders/CS_BuildHiZ.hlsl diff --git a/D3D11Engine/ConstantBufferStructs.h b/D3D11Engine/ConstantBufferStructs.h index 8fbc5655..b7167686 100644 --- a/D3D11Engine/ConstantBufferStructs.h +++ b/D3D11Engine/ConstantBufferStructs.h @@ -82,6 +82,18 @@ struct CullConstants { UINT windAdvanced; UINT numVobs; UINT feedbackFrameNumber; // >0 = write feedback in CS; 0 = disabled (e.g. shadow pass) + UINT enableHiZ; // 1 = Hi-Z occlusion culling enabled + UINT hiZMipCount; + float hiZWidth; // Hi-Z mip 0 dimensions (full depth buffer size) + float hiZHeight; + XMFLOAT4X4 viewProjection; // Current frame view-projection matrix for Hi-Z reprojection +}; + +struct HiZBuildConstants { + UINT outputWidth; + UINT outputHeight; + UINT inputMipLevel; + UINT isCopyPass; // 1 = copy from depth buffer (mip 0), 0 = downsample from previous mip }; #pragma pack (push, 1) diff --git a/D3D11Engine/D3D11GraphicsEngine.cpp b/D3D11Engine/D3D11GraphicsEngine.cpp index 93b006f6..e624e5bb 100644 --- a/D3D11Engine/D3D11GraphicsEngine.cpp +++ b/D3D11Engine/D3D11GraphicsEngine.cpp @@ -99,7 +99,6 @@ static std::unique_ptr agsDevice; extern bool userHaveAMDGPU; bool SupportTextureAtlases = false; -bool SupportStreamingResources = false; namespace { @@ -606,16 +605,6 @@ XRESULT D3D11GraphicsEngine::Init() { SupportTextureAtlases = true; Engine::GAPI->GetRendererState().RendererSettings.UseIndirectVobShadows = SupportTextureAtlases; } - - // Check for tiled resource (streaming) support - SupportStreamingResources = D3D11StreamingResourcesManager::GetIsStreamingSupported( Device.Get() ); - Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.FeatureSet.StreamingResourcesSupported = SupportStreamingResources; - if ( SupportStreamingResources ) { - LogInfo() << "Tiled Resources supported — streaming resource manager available"; - // Allow atlas path even on < 4GB cards when streaming is available - SupportTextureAtlases = true; - Engine::GAPI->GetRendererState().RendererSettings.UseIndirectVobShadows = true; - } } LogInfo() << "Creating ShaderManager"; @@ -982,6 +971,9 @@ XRESULT D3D11GraphicsEngine::RecreateBuffers() { GetDevice().Get(), roundedTextureResolution.x, roundedTextureResolution.y, DXGI_FORMAT_R32_TYPELESS, nullptr, DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32_FLOAT ); + // Create / recreate Hi-Z pyramid resources to match new depth buffer size + CreateHiZResources(); + // Create PFX-Renderer if ( !PfxRenderer ) PfxRenderer = std::make_unique(); @@ -6249,20 +6241,18 @@ XRESULT D3D11GraphicsEngine::DrawVOBsIndirect( const Frustum& frustum, bool bind auto _ = RecordGraphicsEvent( L"DrawVOBsIndirect" ); - // --- 0. Update streaming tile mappings (only on main pass, not shadow) --- - if ( m_StreamingResources && bindPS ) { - // Read back feedback from 2 frames ago to determine which sources need loading - ReadBackFeedback(); + auto& context = GetContext(); - XMFLOAT3 camPos = Engine::GAPI->GetCameraPosition(); - float drawDist = Engine::GAPI->GetRendererState().RendererSettings.OutdoorVobDrawRadius; - float currentTime = Engine::GAPI->GetTimeSeconds(); - const std::unordered_set* requested = m_RequestedSources.empty() ? nullptr : &m_RequestedSources; - m_StreamingResources->UpdateStreaming( camPos, drawDist, currentTime, requested ); + // --- 0b. Build Hi-Z pyramid from current depth buffer (main pass only) --- + // The world mesh has already been rendered, so the depth buffer contains valid + // occluder geometry. Copy depth first to avoid DSV/SRV resource hazard, then + // build the hierarchical min-depth mip chain for GPU culling. + const bool useHiZ = bindPS && m_HiZTexture && m_HiZSRV; + if ( useHiZ ) { + CopyDepthStencil(); + BuildHiZPyramid(); } - auto& context = GetContext(); - // --- 1. Reset indirect args InstanceCounts via CopyResource from template --- context->CopyResource( m_MergedIndirectArgs->GetIndirectBuffer().Get(), m_IndirectArgsTemplate.Get() ); @@ -6277,15 +6267,25 @@ XRESULT D3D11GraphicsEngine::DrawVOBsIndirect( const Frustum& frustum, bool bind cb.windAdvanced = (Engine::GAPI->GetRendererState().RendererSettings.WindQuality == GothicRendererSettings::EWindQuality::WIND_QUALITY_ADVANCED) ? 1 : 0; cb.numVobs = static_cast(m_StaticVobs.size()); + cb.feedbackFrameNumber = 0; - // Feedback: on main pass, increment frame number and tell CS to stamp visible sources. - // On shadow pass, feedbackFrameNumber = 0 disables feedback writes in the CS. - const bool useFeedback = bindPS && m_FeedbackTexture && m_FeedbackUAV && m_StreamingResources; - if ( useFeedback ) { - m_FeedbackFrameNumber++; - cb.feedbackFrameNumber = m_FeedbackFrameNumber; + // Hi-Z occlusion culling: populate view-projection and Hi-Z dimensions + if ( useHiZ ) { + cb.enableHiZ = 1; + cb.hiZMipCount = m_HiZMipCount; + cb.hiZWidth = static_cast( DepthStencilBuffer->GetSizeX() ); + cb.hiZHeight = static_cast( DepthStencilBuffer->GetSizeY() ); + + XMMATRIX view = Engine::GAPI->GetViewMatrixXM(); + auto& projF = Engine::GAPI->GetProjectionMatrix(); + XMStoreFloat4x4( &cb.viewProjection, + XMMatrixMultiply( view, XMLoadFloat4x4( &projF ) ) ); } else { - cb.feedbackFrameNumber = 0; + cb.enableHiZ = 0; + cb.hiZMipCount = 0; + cb.hiZWidth = 0.0f; + cb.hiZHeight = 0.0f; + XMStoreFloat4x4( &cb.viewProjection, XMMatrixIdentity() ); } m_CullConstantBuffer->UpdateBuffer( &cb ); @@ -6304,6 +6304,12 @@ XRESULT D3D11GraphicsEngine::DrawVOBsIndirect( const Frustum& frustum, bool bind }; context->CSSetShaderResources( 0, 2, srvs ); + // SRV t2 = Hi-Z pyramid texture (for occlusion culling) + if ( useHiZ ) { + ID3D11ShaderResourceView* hiZSRV = m_HiZSRV.Get(); + context->CSSetShaderResources( 2, 1, &hiZSRV ); + } + // UAV u0 = InstanceOutput (structured), u1 = IndirectArgs (raw byte address) ID3D11UnorderedAccessView* uavs[2] = { m_InstanceBufferGPU->GetUAV(), @@ -6311,32 +6317,14 @@ XRESULT D3D11GraphicsEngine::DrawVOBsIndirect( const Frustum& frustum, bool bind }; context->CSSetUnorderedAccessViews( 0, 2, uavs, nullptr ); - // UAV u5 = Feedback texture (only on main pass) - if ( useFeedback ) { - ID3D11UnorderedAccessView* feedbackUAV = m_FeedbackUAV.Get(); - context->CSSetUnorderedAccessViews( 5, 1, &feedbackUAV, nullptr ); - } - UINT numGroups = (static_cast(m_StaticVobs.size()) + 63) / 64; context->Dispatch( numGroups, 1, 1 ); - // Copy feedback to staging ring for async readback (before unbinding UAVs) - if ( useFeedback ) { - context->CopyResource( - m_FeedbackStaging[m_FeedbackStagingHead % 3].Get(), - m_FeedbackTexture.Get() ); - m_FeedbackStagingHead++; - } - // Unbind CS resources - ID3D11ShaderResourceView* nullSRV[2] = { nullptr, nullptr }; + ID3D11ShaderResourceView* nullSRV[3] = { nullptr, nullptr, nullptr }; ID3D11UnorderedAccessView* nullUAV[2] = { nullptr, nullptr }; - context->CSSetShaderResources( 0, 2, nullSRV ); + context->CSSetShaderResources( 0, 3, nullSRV ); context->CSSetUnorderedAccessViews( 0, 2, nullUAV, nullptr ); - if ( useFeedback ) { - ID3D11UnorderedAccessView* nullFeedbackUAV = nullptr; - context->CSSetUnorderedAccessViews( 5, 1, &nullFeedbackUAV, nullptr ); - } context->CSSetShader( nullptr, nullptr, 0 ); // --- 4. Bind global geometry (once) --- @@ -8275,17 +8263,6 @@ void D3D11GraphicsEngine::BuildGPUCullingBuffers() { }; std::unordered_map> sourceIndexLookup; std::unordered_map formatGlobalOffsets; // cached offsets for global index computation - if ( m_StreamingResources ) { - for ( auto& group : m_AtlasDrawGroups ) { - const auto& srcs = m_StreamingResources->GetSourceTextures( group.format ); - auto& lookup = sourceIndexLookup[group.format]; - UINT globalOffset = m_StreamingResources->GetGlobalSourceOffset( group.format ); - formatGlobalOffsets[group.format] = globalOffset; - for ( UINT i = 0; i < static_cast( srcs.size() ); i++ ) { - lookup[{ srcs[i].slice, srcs[i].x, srcs[i].y }] = i; - } - } - } UINT runningInstanceOffset = 0; UINT globalArgIndex = 0; @@ -8320,23 +8297,6 @@ void D3D11GraphicsEngine::BuildGPUCullingBuffers() { // Populate globalSourceIndex for GPU feedback by reverse-looking up pixel coords smGPU.globalSourceIndex = 0; - if ( m_StreamingResources ) { - auto fmtIt = sourceIndexLookup.find( group.format ); - if ( fmtIt != sourceIndexLookup.end() ) { - // Convert normalized UVs back to pixel coords (atlasSize from texture desc) - D3D11_TEXTURE2D_DESC atlasTexDesc; - m_TextureAtlasses[group.format].atlasTextureArray->GetDesc( &atlasTexDesc ); - UINT px = static_cast( submesh.atlasDesc.uStart * atlasTexDesc.Width + 0.5f ); - UINT py = static_cast( submesh.atlasDesc.vStart * atlasTexDesc.Height + 0.5f ); - UINT sl = static_cast( submesh.atlasDesc.slice ); - auto srcIt = fmtIt->second.find( { sl, px, py } ); - if ( srcIt != fmtIt->second.end() ) { - // Store the global source index (format offset + local index) - UINT globalOffset = formatGlobalOffsets.count( group.format ) ? formatGlobalOffsets[group.format] : 0; - smGPU.globalSourceIndex = globalOffset + srcIt->second; - } - } - } visualSubmeshMap[visual].push_back( smGPU ); @@ -8468,117 +8428,190 @@ void D3D11GraphicsEngine::BuildGPUCullingBuffers() { << m_TotalMaxInstances << " max instances"; } -void D3D11GraphicsEngine::CreateFeedbackBuffers() { - // Only create feedback infrastructure when streaming is active - if ( !m_StreamingResources ) +void D3D11GraphicsEngine::CreateHiZResources() { + auto* device = GetDevice().Get(); + HRESULT hr; + + // Get depth buffer dimensions + UINT width = DepthStencilBuffer->GetSizeX(); + UINT height = DepthStencilBuffer->GetSizeY(); + if ( width == 0 || height == 0 ) return; - UINT totalSources = m_StreamingResources->GetTotalSourceCount(); - if ( totalSources == 0 ) + // Calculate mip count for full mip chain + UINT mipCount = 1; + { + UINT w = width, h = height; + while ( w > 1 || h > 1 ) { + w = (std::max)( w / 2, 1u ); + h = (std::max)( h / 2, 1u ); + mipCount++; + } + } + m_HiZMipCount = mipCount; + + // Create Hi-Z texture: full mip chain, SRV-bindable (used as CS input via SRV) + D3D11_TEXTURE2D_DESC hiZDesc = {}; + hiZDesc.Width = width; + hiZDesc.Height = height; + hiZDesc.MipLevels = mipCount; + hiZDesc.ArraySize = 1; + hiZDesc.Format = DXGI_FORMAT_R32_FLOAT; + hiZDesc.SampleDesc.Count = 1; + hiZDesc.Usage = D3D11_USAGE_DEFAULT; + hiZDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + + hr = device->CreateTexture2D( &hiZDesc, nullptr, m_HiZTexture.ReleaseAndGetAddressOf() ); + if ( FAILED( hr ) ) { + LogError() << "[Hi-Z] Failed to create Hi-Z texture"; return; + } - // Create the feedback texture: RWTexture2D of size (totalSources, 1) - D3D11_TEXTURE2D_DESC texDesc = {}; - texDesc.Width = totalSources; - texDesc.Height = 1; - texDesc.MipLevels = 1; - texDesc.ArraySize = 1; - texDesc.Format = DXGI_FORMAT_R32_UINT; - texDesc.SampleDesc.Count = 1; - texDesc.Usage = D3D11_USAGE_DEFAULT; - texDesc.BindFlags = D3D11_BIND_UNORDERED_ACCESS; + // SRV for the full Hi-Z texture (all mips, used for occlusion testing in CS_CullVobs) + D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.Format = DXGI_FORMAT_R32_FLOAT; + srvDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; + srvDesc.Texture2D.MipLevels = mipCount; + srvDesc.Texture2D.MostDetailedMip = 0; - HRESULT hr = GetDevice()->CreateTexture2D( &texDesc, nullptr, m_FeedbackTexture.ReleaseAndGetAddressOf() ); + hr = device->CreateShaderResourceView( m_HiZTexture.Get(), &srvDesc, m_HiZSRV.ReleaseAndGetAddressOf() ); if ( FAILED( hr ) ) { - LogError() << "[Feedback] Failed to create feedback texture (" << totalSources << " sources)"; + LogError() << "[Hi-Z] Failed to create Hi-Z SRV"; + m_HiZTexture.Reset(); return; } - // Create UAV for the feedback texture + // Create scratch texture: single mip, UAV-bindable (CS writes here, then we copy to Hi-Z) + D3D11_TEXTURE2D_DESC scratchDesc = {}; + scratchDesc.Width = width; + scratchDesc.Height = height; + scratchDesc.MipLevels = 1; + scratchDesc.ArraySize = 1; + scratchDesc.Format = DXGI_FORMAT_R32_FLOAT; + scratchDesc.SampleDesc.Count = 1; + scratchDesc.Usage = D3D11_USAGE_DEFAULT; + scratchDesc.BindFlags = D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE; + + hr = device->CreateTexture2D( &scratchDesc, nullptr, m_HiZScratch.ReleaseAndGetAddressOf() ); + if ( FAILED( hr ) ) { + LogError() << "[Hi-Z] Failed to create scratch texture"; + m_HiZTexture.Reset(); + m_HiZSRV.Reset(); + return; + } + + // Scratch UAV D3D11_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; - uavDesc.Format = DXGI_FORMAT_R32_UINT; + uavDesc.Format = DXGI_FORMAT_R32_FLOAT; uavDesc.ViewDimension = D3D11_UAV_DIMENSION_TEXTURE2D; uavDesc.Texture2D.MipSlice = 0; - hr = GetDevice()->CreateUnorderedAccessView( m_FeedbackTexture.Get(), &uavDesc, m_FeedbackUAV.ReleaseAndGetAddressOf() ); + hr = device->CreateUnorderedAccessView( m_HiZScratch.Get(), &uavDesc, m_HiZScratchUAV.ReleaseAndGetAddressOf() ); if ( FAILED( hr ) ) { - LogError() << "[Feedback] Failed to create feedback UAV"; - m_FeedbackTexture.Reset(); + LogError() << "[Hi-Z] Failed to create scratch UAV"; + m_HiZTexture.Reset(); + m_HiZSRV.Reset(); + m_HiZScratch.Reset(); return; } - // Clear feedback texture to zero so the first readback doesn't see garbage - const UINT clearValue[4] = { 0, 0, 0, 0 }; - GetContext()->ClearUnorderedAccessViewUint( m_FeedbackUAV.Get(), clearValue ); - - // Create 3 staging textures for async readback (2-frame latency ring) - for ( int i = 0; i < 3; i++ ) { - D3D11_TEXTURE2D_DESC stagingDesc = {}; - stagingDesc.Width = totalSources; - stagingDesc.Height = 1; - stagingDesc.MipLevels = 1; - stagingDesc.ArraySize = 1; - stagingDesc.Format = DXGI_FORMAT_R32_UINT; - stagingDesc.SampleDesc.Count = 1; - stagingDesc.Usage = D3D11_USAGE_STAGING; - stagingDesc.CPUAccessFlags = D3D11_CPU_ACCESS_READ; + // Scratch SRV (not strictly needed, but useful for debugging) + D3D11_SHADER_RESOURCE_VIEW_DESC scratchSRVDesc = {}; + scratchSRVDesc.Format = DXGI_FORMAT_R32_FLOAT; + scratchSRVDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; + scratchSRVDesc.Texture2D.MipLevels = 1; + scratchSRVDesc.Texture2D.MostDetailedMip = 0; - hr = GetDevice()->CreateTexture2D( &stagingDesc, nullptr, m_FeedbackStaging[i].ReleaseAndGetAddressOf() ); - if ( FAILED( hr ) ) { - LogError() << "[Feedback] Failed to create staging texture " << i; - m_FeedbackTexture.Reset(); - m_FeedbackUAV.Reset(); - return; - } + hr = device->CreateShaderResourceView( m_HiZScratch.Get(), &scratchSRVDesc, m_HiZScratchSRV.ReleaseAndGetAddressOf() ); + if ( FAILED( hr ) ) { + LogError() << "[Hi-Z] Failed to create scratch SRV"; + m_HiZTexture.Reset(); + m_HiZSRV.Reset(); + m_HiZScratch.Reset(); + m_HiZScratchUAV.Reset(); + return; } - m_FeedbackStagingHead = 0; - m_FeedbackFrameNumber = 0; - m_RequestedSources.clear(); - - LogInfo() << "[Feedback] Created feedback buffers: " << totalSources << " sources"; + LogInfo() << "[Hi-Z] Created Hi-Z pyramid resources: " << width << "x" << height + << ", " << mipCount << " mip levels"; } -void D3D11GraphicsEngine::ReadBackFeedback() { - if ( !m_FeedbackTexture || !m_StreamingResources ) +void D3D11GraphicsEngine::BuildHiZPyramid() { + if ( !m_HiZTexture || !m_HiZScratch || m_HiZMipCount == 0 ) return; - UINT totalSources = m_StreamingResources->GetTotalSourceCount(); - if ( totalSources == 0 ) + auto hiZCS = ShaderManager->GetCShader( CShaderID::CS_BuildHiZ ); + if ( !hiZCS ) return; - // We read back the staging buffer from 2 frames ago (to avoid GPU stalls) - // Only attempt readback once we have at least 3 frames of data - if ( m_FeedbackFrameNumber < 3 ) - return; - - UINT readIndex = ( m_FeedbackStagingHead + 1 ) % 3; // 2 frames behind current - - D3D11_MAPPED_SUBRESOURCE mapped = {}; - HRESULT hr = GetContext()->Map( m_FeedbackStaging[readIndex].Get(), 0, D3D11_MAP_READ, D3D11_MAP_FLAG_DO_NOT_WAIT, &mapped ); - if ( FAILED( hr ) ) { - // GPU hasn't finished yet — skip this frame's readback - return; - } - - m_RequestedSources.clear(); - const UINT* data = static_cast( mapped.pData ); - - // A source is "requested" if its feedback value is a recent frame number - // (within the last few frames). This provides hysteresis against single-frame flickers. - UINT threshold = ( m_FeedbackFrameNumber > 4 ) ? m_FeedbackFrameNumber - 4 : 0; + auto& context = GetContext(); - for ( UINT i = 0; i < totalSources; i++ ) { - const auto value = data[i]; - if ( value >= threshold ) { - m_RequestedSources.insert( i ); - } + UINT width = DepthStencilBuffer->GetSizeX(); + UINT height = DepthStencilBuffer->GetSizeY(); + + hiZCS->Apply(); + + for ( UINT mip = 0; mip < m_HiZMipCount; mip++ ) { + UINT mipWidth = (std::max)( width >> mip, 1u ); + UINT mipHeight = (std::max)( height >> mip, 1u ); + + // Update constant buffer + HiZBuildConstants cb = {}; + cb.outputWidth = mipWidth; + cb.outputHeight = mipHeight; + cb.inputMipLevel = ( mip > 0 ) ? ( mip - 1 ) : 0; + cb.isCopyPass = ( mip == 0 ) ? 1 : 0; + hiZCS->GetConstantBuffer()[0]->UpdateBuffer( &cb ); + hiZCS->GetConstantBuffer()[0]->BindToComputeShader( 0 ); + + // Bind input SRV: + // Mip 0: read from depth buffer copy (avoids DSV/SRV hazard) + // Mip N: read from Hi-Z texture SRV (previous mip levels already filled) + ID3D11ShaderResourceView* inputSRV = nullptr; + if ( mip == 0 ) { + inputSRV = DepthStencilBufferCopy->GetShaderResView().Get(); + } else { + inputSRV = m_HiZSRV.Get(); + } + context->CSSetShaderResources( 0, 1, &inputSRV ); + + // Bind output UAV: always the scratch texture + ID3D11UnorderedAccessView* uav = m_HiZScratchUAV.Get(); + context->CSSetUnorderedAccessViews( 0, 1, &uav, nullptr ); + + // Dispatch + UINT groupsX = ( mipWidth + 7 ) / 8; + UINT groupsY = ( mipHeight + 7 ) / 8; + context->Dispatch( groupsX, groupsY, 1 ); + + // Unbind SRV and UAV to allow the copy + ID3D11ShaderResourceView* nullSRV = nullptr; + ID3D11UnorderedAccessView* nullUAV = nullptr; + context->CSSetShaderResources( 0, 1, &nullSRV ); + context->CSSetUnorderedAccessViews( 0, 1, &nullUAV, nullptr ); + + // Copy scratch (mip 0) -> Hi-Z texture (mip N) + D3D11_BOX srcBox = {}; + srcBox.left = 0; + srcBox.top = 0; + srcBox.right = mipWidth; + srcBox.bottom = mipHeight; + srcBox.front = 0; + srcBox.back = 1; + + context->CopySubresourceRegion( + m_HiZTexture.Get(), + D3D11CalcSubresource( mip, 0, m_HiZMipCount ), + 0, 0, 0, + m_HiZScratch.Get(), + 0, + &srcBox ); } - GetContext()->Unmap( m_FeedbackStaging[readIndex].Get(), 0 ); + // Clean up CS state + context->CSSetShader( nullptr, nullptr, 0 ); } - void D3D11GraphicsEngine::BuildSceneTextureAtlasses() { for ( size_t i = 0; i < TEXTURE_ATLAS_MAX; i++ ) { m_TextureAtlasses[(DXGI_FORMAT)i].Destroy(); @@ -8586,20 +8619,6 @@ void D3D11GraphicsEngine::BuildSceneTextureAtlasses() { m_TextureAtlasLookup.clear(); m_AtlasDrawGroups.clear(); - // Clean up any previous streaming state - if ( m_StreamingResources ) { - m_StreamingResources->OnWorldUnloaded(); - } - - // Clean up feedback buffers - m_FeedbackTexture.Reset(); - m_FeedbackUAV.Reset(); - for ( int i = 0; i < 3; i++ ) - m_FeedbackStaging[i].Reset(); - m_FeedbackStagingHead = 0; - m_FeedbackFrameNumber = 0; - m_RequestedSources.clear(); - if ( !SupportTextureAtlases ) { return; } @@ -8645,23 +8664,6 @@ void D3D11GraphicsEngine::BuildSceneTextureAtlasses() { return a.Format < b.Format; } ); - // Determine if we should use the streaming (tiled resources) path - Engine::GAPI->GetRendererState().RendererSettings.EnableStreamingResources = SupportStreamingResources; - bool useStreaming = SupportStreamingResources - && Engine::GAPI->GetRendererState().RendererSettings.EnableStreamingResources; - - if ( useStreaming ) { - // Initialize streaming manager if not yet created - if ( !m_StreamingResources ) { - m_StreamingResources = std::make_unique(); - if ( !m_StreamingResources->Init( GetDevice().Get(), GetContext().Get() ) ) { - LogWarn() << "Streaming resources init failed, falling back to monolithic atlases"; - m_StreamingResources.reset(); - useStreaming = false; - } - } - } - // Create atlases per format group (process ALL groups including last) size_t rangeStart = 0; while ( rangeStart < uniqueTextures.size() ) { @@ -8677,14 +8679,7 @@ void D3D11GraphicsEngine::BuildSceneTextureAtlasses() { std::basic_string_view txView( texPtrs.data(), texPtrs.size() ); - TextureManager::AtlasResult atlas; - if ( useStreaming && m_StreamingResources ) { - // Streaming path: tiled Texture2DArray backed by tile pool - atlas = m_StreamingResources->CreateStreamingAtlasArray( txView, 2048, 6 ); - } else { - // Monolithic path: fully committed Texture2DArray (original behavior) - atlas = TextureManager::CreateAtlasArray( GetDevice().Get(), GetContext().Get(), txView, 2048, 6 ); - } + TextureManager::AtlasResult atlas = TextureManager::CreateAtlasArray( GetDevice().Get(), GetContext().Get(), txView, 2048, 6 ); // Map descriptors back to Gothic texture pointers for ( size_t i = 0; i < texPtrs.size(); i++ ) { @@ -8698,8 +8693,7 @@ void D3D11GraphicsEngine::BuildSceneTextureAtlasses() { rangeStart = rangeEnd; } - LogInfo() << "Atlas: " << uniqueTextures.size() << " unique textures, " << m_TextureAtlasLookup.size() << " mapped" - << ( useStreaming ? " (streaming)" : " (monolithic)" ); + LogInfo() << "Atlas: " << uniqueTextures.size() << " unique textures, " << m_TextureAtlasLookup.size() << " mapped"; // Build global VB/IB and indirect args from atlas data BuildStaticGeometryBuffers(); @@ -8707,9 +8701,6 @@ void D3D11GraphicsEngine::BuildSceneTextureAtlasses() { // Build GPU structured buffers for compute shader culling // currently only used with static vobs when we do atlases. BuildGPUCullingBuffers(); - - // Create GPU feedback buffers for streaming (after atlas and culling buffers are ready) - CreateFeedbackBuffers(); } void D3D11GraphicsEngine::CacheWorldStaticVobs() { diff --git a/D3D11Engine/D3D11GraphicsEngine.h b/D3D11Engine/D3D11GraphicsEngine.h index 9347f8ec..bb44d63e 100644 --- a/D3D11Engine/D3D11GraphicsEngine.h +++ b/D3D11Engine/D3D11GraphicsEngine.h @@ -536,19 +536,16 @@ class D3D11GraphicsEngine : public D3D11GraphicsEngineBase { std::vector m_MergedArgsReset; // CPU-side template for reset UINT m_TotalMaxInstances = 0; - /** Streaming resources manager (tiled resources) — opt-in, coexists with monolithic atlas */ - std::unique_ptr m_StreamingResources; - - /** GPU feedback for streaming: source-indexed RWTexture2D tracking which textures need loading */ - Microsoft::WRL::ComPtr m_FeedbackTexture; - Microsoft::WRL::ComPtr m_FeedbackUAV; - Microsoft::WRL::ComPtr m_FeedbackStaging[3]; // ring buffer for async readback - UINT m_FeedbackStagingHead = 0; - UINT m_FeedbackFrameNumber = 0; - std::unordered_set m_RequestedSources; // result of last readback - - /** Read back the feedback texture from 2 frames ago and populate m_RequestedSources */ - void ReadBackFeedback(); - /** Create feedback buffer infrastructure (called after atlas creation) */ - void CreateFeedbackBuffers(); + /** Hi-Z occlusion culling resources */ + Microsoft::WRL::ComPtr m_HiZTexture; // Full mip-chain, SRV-only + Microsoft::WRL::ComPtr m_HiZSRV; + Microsoft::WRL::ComPtr m_HiZScratch; // Single-mip scratch for CS UAV writes + Microsoft::WRL::ComPtr m_HiZScratchUAV; + Microsoft::WRL::ComPtr m_HiZScratchSRV; + UINT m_HiZMipCount = 0; + + /** Create Hi-Z pyramid resources (called after depth buffer creation) */ + void CreateHiZResources(); + /** Build the Hi-Z mip chain from the current depth buffer */ + void BuildHiZPyramid(); }; diff --git a/D3D11Engine/D3D11ShaderManager.cpp b/D3D11Engine/D3D11ShaderManager.cpp index f15a36fc..4a9bb997 100644 --- a/D3D11Engine/D3D11ShaderManager.cpp +++ b/D3D11Engine/D3D11ShaderManager.cpp @@ -569,8 +569,10 @@ XRESULT D3D11ShaderManager::Init() { Shaders.push_back( ShaderInfo::make( "CS_TiledShading.hlsl" )); + Shaders.push_back( ShaderInfo::make( "CS_CullVobs.hlsl" )); - Shaders.push_back( ShaderInfo::make( "CS_CullVobs.hlsl" ));} + Shaders.push_back( ShaderInfo::make( "CS_BuildHiZ.hlsl" ) ); + } return XR_SUCCESS; } diff --git a/D3D11Engine/GothicAPI.cpp b/D3D11Engine/GothicAPI.cpp index 82d192ec..bf6e9769 100644 --- a/D3D11Engine/GothicAPI.cpp +++ b/D3D11Engine/GothicAPI.cpp @@ -4132,7 +4132,7 @@ std::vector::iterator GothicAPI::MoveVobFromBspToDynamic( VobInfo* vob static void CVVH_AddNotDrawnVobToList( std::vector& source, - float dist, + float distSq, const RndCullContext& ctx, DirectX::ContainmentType bspContainment, BspTreeVobVisitor* visitor @@ -4140,7 +4140,6 @@ static void CVVH_AddNotDrawnVobToList( const auto camPos = XMLoadFloat3( &ctx.cameraPosition ); auto cullingEnabled = Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.Culling.CullVobs && ctx.frustum.SupportsCulling(); - auto distSq = dist * dist; for ( auto const& it : source ) { if ( it->VisibleInRenderPass ) continue; @@ -4167,14 +4166,14 @@ static void CVVH_AddNotDrawnVobToList( static void CVVH_AddNotDrawnVobToList( std::vector& source, - float dist, const RndCullContext& ctx, + float distSq, const RndCullContext& ctx, DirectX::ContainmentType bspContainment, BspTreeVobVisitor* visitor) { const auto camPos = XMLoadFloat3( &ctx.cameraPosition ); auto cullingEnabled = Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.Culling.CullVobs && ctx.frustum.SupportsCulling(); - auto vDistSq = XMVectorReplicate( dist * dist ); + auto vDistSq = XMVectorReplicate( distSq ); for ( auto const& it : source ) { if ( it->VisibleInRenderPass ) continue; @@ -5617,10 +5616,13 @@ static void CollectVisibleVobsHelper( BspInfo* base, float yMaxWorld, EBspTreeCollectFlags collectFlags = EBspTreeCollectFlags::COLLECT_ALL_NO_MUTATE ) { - const float vobIndoorDist = ctx.drawDistances.IndoorVobs; - const float vobOutdoorDist = ctx.drawDistances.OutdoorVobs; - const float vobOutdoorSmallDist = ctx.drawDistances.OutdoorVobsSmall; + const float vobIndoorDistSq = ctx.drawDistances.IndoorVobs * ctx.drawDistances.IndoorVobs; + const float vobOutdoorDistSq = ctx.drawDistances.OutdoorVobs * ctx.drawDistances.OutdoorVobs; + const float vobOutdoorSmallDistSq = ctx.drawDistances.OutdoorVobsSmall * ctx.drawDistances.OutdoorVobsSmall; + const float visualFXDrawRadius = ctx.drawDistances.VisualFX; + const float visualFXDrawRadiusSq = ctx.drawDistances.VisualFX * ctx.drawDistances.VisualFX; + const XMFLOAT3 camPos = ctx.cameraPosition; const FXMVECTOR cameraPosition = XMLoadFloat3( &camPos ); int clipFlags = EGothicCullFlags::CullSidesNear; @@ -5643,11 +5645,11 @@ static void CollectVisibleVobsHelper( BspInfo* base, nodeYMax = std::max( nodeYMax, base->OriginalNode->BBox3D.Max.y ); nodeBox.Max.y = nodeYMax; - const float dist = checkDist - ? Toolbox::ComputePointAABBDistance( camPos, base->OriginalNode->BBox3D.Min, base->OriginalNode->BBox3D.Max ) + const float distSq = checkDist + ? Toolbox::ComputePointAABBDistanceSq( camPos, base->OriginalNode->BBox3D.Min, base->OriginalNode->BBox3D.Max ) : 0; ContainmentType clipResult = inheritedContainment; - if ( dist < vobOutdoorDist ) { + if ( distSq < vobOutdoorDistSq ) { if ( !RendererState.RendererSettings.EnableOcclusionCulling ) { if ( clipResult != ContainmentType::CONTAINS ) { clipResult = ctx.frustum.Contains( Frustum::BBoxFromzTBBox3D( nodeBox ) ); @@ -5686,34 +5688,32 @@ static void CollectVisibleVobsHelper( BspInfo* base, std::vector& listC = base->Vobs; std::vector& listD = base->Mobs; - const float dist = checkDist - ? Toolbox::ComputePointAABBDistance( camPos, base->OriginalNode->BBox3D.Min, base->OriginalNode->BBox3D.Max ) + const float distSq = checkDist + ? Toolbox::ComputePointAABBDistanceSq( camPos, base->OriginalNode->BBox3D.Min, base->OriginalNode->BBox3D.Max ) : 0; if ( collectFlags & COLLECT_VOBS && RendererState.RendererSettings.DrawVOBs ) { - if ( collectFlags & COLLECT_INDOOR_VOBS && dist < vobIndoorDist ) { - CVVH_AddNotDrawnVobToList( listA, vobIndoorDist, ctx, clipResult, visitor ); + if ( collectFlags & COLLECT_INDOOR_VOBS && distSq < vobIndoorDistSq ) { + CVVH_AddNotDrawnVobToList( listA, vobIndoorDistSq, ctx, clipResult, visitor ); } - if ( dist < vobOutdoorSmallDist ) { - CVVH_AddNotDrawnVobToList( listB, vobOutdoorSmallDist, ctx, clipResult, visitor ); + if ( distSq < vobOutdoorSmallDistSq ) { + CVVH_AddNotDrawnVobToList( listB, vobOutdoorSmallDistSq, ctx, clipResult, visitor ); } - if ( dist < vobOutdoorDist ) { - CVVH_AddNotDrawnVobToList( listC, vobOutdoorDist, ctx, clipResult, visitor ); + if ( distSq < vobOutdoorDistSq ) { + CVVH_AddNotDrawnVobToList( listC, vobOutdoorDistSq, ctx, clipResult, visitor ); } } if ( collectFlags & COLLECT_MOBS - && RendererState.RendererSettings.DrawMobs && dist < vobOutdoorSmallDist ) { - CVVH_AddNotDrawnVobToList( listD, vobOutdoorDist, ctx, clipResult, visitor); + && RendererState.RendererSettings.DrawMobs && distSq < vobOutdoorSmallDistSq ) { + CVVH_AddNotDrawnVobToList( listD, vobOutdoorDistSq, ctx, clipResult, visitor); } if ( collectFlags & COLLECT_LIGHTS - && RendererState.RendererSettings.EnableDynamicLighting && dist < visualFXDrawRadius ) { - - bool markSeen = (collectFlags & COLLECT_MUTATE) != 0; + && RendererState.RendererSettings.EnableDynamicLighting && distSq < visualFXDrawRadiusSq ) { // Add dynamic lights for ( int i = 0; i < leaf->LightVobList.NumInArray; i++ ) { zCVobLight* vob = leaf->LightVobList.Array[i]; @@ -5815,7 +5815,7 @@ struct BspTraversalNode { DirectX::ContainmentType inheritedContainment; }; -static void CollectVisibleVobsHelper2( BspInfo* base, +static void CollectVisibleVobsHelperNonRecursive( BspInfo* base, zTBBox3D boxCell, const RndCullContext& ctx, BspTreeVobVisitor* visitor, @@ -5823,10 +5823,12 @@ static void CollectVisibleVobsHelper2( BspInfo* base, float yMaxWorld, EBspTreeCollectFlags collectFlags = EBspTreeCollectFlags::COLLECT_ALL_NO_MUTATE ) { - const float vobIndoorDist = ctx.drawDistances.IndoorVobs; - const float vobOutdoorDist = ctx.drawDistances.OutdoorVobs; - const float vobOutdoorSmallDist = ctx.drawDistances.OutdoorVobsSmall; + const float vobIndoorDistSq = ctx.drawDistances.IndoorVobs * ctx.drawDistances.IndoorVobs; + const float vobOutdoorDistSq = ctx.drawDistances.OutdoorVobs * ctx.drawDistances.OutdoorVobs; + const float vobOutdoorSmallDistSq = ctx.drawDistances.OutdoorVobsSmall * ctx.drawDistances.OutdoorVobsSmall; + const float visualFXDrawRadius = ctx.drawDistances.VisualFX; + const float visualFXDrawRadiusSq = ctx.drawDistances.VisualFX * ctx.drawDistances.VisualFX; const XMFLOAT3 camPos = ctx.cameraPosition; const bool checkDist = (collectFlags & COLLECT_DISABLE_CHECK_DIST) == 0; @@ -5860,11 +5862,11 @@ static void CollectVisibleVobsHelper2( BspInfo* base, nodeYMax = std::max( nodeYMax, currBase->OriginalNode->BBox3D.Max.y ); nodeBox.Max.y = nodeYMax; - const float dist = checkDist - ? Toolbox::ComputePointAABBDistance( camPos, currBase->OriginalNode->BBox3D.Min, currBase->OriginalNode->BBox3D.Max ) + const float distSq = checkDist + ? Toolbox::ComputePointAABBDistanceSq( camPos, currBase->OriginalNode->BBox3D.Min, currBase->OriginalNode->BBox3D.Max ) : 0; - if ( dist < vobOutdoorDist ) { + if ( distSq < vobOutdoorDistSq ) { if ( !RendererState.RendererSettings.EnableOcclusionCulling ) { if ( clipResult != ContainmentType::CONTAINS ) { clipResult = ctx.frustum.Contains( Frustum::BBoxFromzTBBox3D( nodeBox ) ); @@ -5888,22 +5890,22 @@ static void CollectVisibleVobsHelper2( BspInfo* base, zCBspLeaf* leaf = static_cast(currBase->OriginalNode); if ( collectFlags & COLLECT_VOBS && RendererState.RendererSettings.DrawVOBs ) { - if ( collectFlags & COLLECT_INDOOR_VOBS && dist < vobIndoorDist ) { - CVVH_AddNotDrawnVobToList( currBase->IndoorVobs, vobIndoorDist, ctx, clipResult, visitor ); + if ( collectFlags & COLLECT_INDOOR_VOBS && distSq < vobIndoorDistSq ) { + CVVH_AddNotDrawnVobToList( currBase->IndoorVobs, vobIndoorDistSq, ctx, clipResult, visitor ); } - if ( dist < vobOutdoorSmallDist ) { - CVVH_AddNotDrawnVobToList( currBase->SmallVobs, vobOutdoorSmallDist, ctx, clipResult, visitor ); + if ( distSq < vobOutdoorSmallDistSq ) { + CVVH_AddNotDrawnVobToList( currBase->SmallVobs, vobOutdoorSmallDistSq, ctx, clipResult, visitor ); } - if ( dist < vobOutdoorDist ) { - CVVH_AddNotDrawnVobToList( currBase->Vobs, vobOutdoorDist, ctx, clipResult, visitor ); + if ( distSq < vobOutdoorDistSq ) { + CVVH_AddNotDrawnVobToList( currBase->Vobs, vobOutdoorDistSq, ctx, clipResult, visitor ); } } - if ( collectFlags & COLLECT_MOBS && RendererState.RendererSettings.DrawMobs && dist < vobOutdoorSmallDist ) { - CVVH_AddNotDrawnVobToList( currBase->Mobs, vobOutdoorDist, ctx, clipResult, visitor ); + if ( collectFlags & COLLECT_MOBS && RendererState.RendererSettings.DrawMobs && distSq < vobOutdoorSmallDistSq ) { + CVVH_AddNotDrawnVobToList( currBase->Mobs, vobOutdoorDistSq, ctx, clipResult, visitor ); } - if ( collectFlags & COLLECT_LIGHTS && RendererState.RendererSettings.EnableDynamicLighting && dist < visualFXDrawRadius ) { + if ( collectFlags & COLLECT_LIGHTS && RendererState.RendererSettings.EnableDynamicLighting && distSq < visualFXDrawRadiusSq ) { for ( int i = 0; i < leaf->LightVobList.NumInArray; i++ ) { zCVobLight* vob = leaf->LightVobList.Array[i]; @@ -6005,7 +6007,7 @@ void GothicAPI::CollectVisibleVobs( const RndCullContext& ctx, EBspTreeCollectFl static thread_local BspTreeVobVisitor bspVobVisitor{}; // Recursively go through the tree and draw all nodes - CollectVisibleVobsHelper2( root, root->OriginalNode->BBox3D, + CollectVisibleVobsHelperNonRecursive( root, root->OriginalNode->BBox3D, ctx, &bspVobVisitor, ContainmentType::INTERSECTS, diff --git a/D3D11Engine/GothicGraphicsState.h b/D3D11Engine/GothicGraphicsState.h index 116cc688..07234fe0 100644 --- a/D3D11Engine/GothicGraphicsState.h +++ b/D3D11Engine/GothicGraphicsState.h @@ -641,7 +641,6 @@ struct GothicRendererSettings { WireframeWorld = false; DrawShadowGeometry = true; UseIndirectVobShadows = false; - EnableStreamingResources = false; FixViewFrustum = false; DisableWatermark = true; DisableRendering = false; @@ -831,7 +830,6 @@ struct GothicRendererSettings { E_ShadowFrustumCulling ShadowFrustumCullingMode; bool DrawShadowGeometry; bool UseIndirectVobShadows; - bool EnableStreamingResources; bool VegetationAlphaToCoverage; bool DisableWatermark; bool DisableRendering; diff --git a/D3D11Engine/ShaderIDs.h b/D3D11Engine/ShaderIDs.h index a2debebc..578e8e1b 100644 --- a/D3D11Engine/ShaderIDs.h +++ b/D3D11Engine/ShaderIDs.h @@ -113,5 +113,6 @@ enum class CShaderID : size_t { CS_LightCulling, CS_TiledShading, CS_CullVobs, + CS_BuildHiZ, COUNT }; diff --git a/D3D11Engine/Shaders/CS_BuildHiZ.hlsl b/D3D11Engine/Shaders/CS_BuildHiZ.hlsl new file mode 100644 index 00000000..bfdb97e4 --- /dev/null +++ b/D3D11Engine/Shaders/CS_BuildHiZ.hlsl @@ -0,0 +1,57 @@ +//-------------------------------------------------------------------------------------- +// Hi-Z Pyramid Build Compute Shader +// Builds a MAX-depth mip chain for hierarchical occlusion culling (reversed-Z). +// Each mip texel stores the NEAREST depth (highest reversed-Z) in its 2x2 source region. +// Mip 0: copy from depth buffer. +// Mip N>0: 2x2 MAX downsample from previous mip. +// +// D3D11 forbids binding the same resource as both SRV and UAV, so we use a +// scratch texture as the UAV target, then CopySubresourceRegion into the +// real Hi-Z texture after each dispatch. +//-------------------------------------------------------------------------------------- + +cbuffer HiZCB : register( b0 ) +{ + uint outputWidth; + uint outputHeight; + uint inputMipLevel; + uint isCopyPass; // 1 = mip 0 (copy from depth), 0 = downsample +}; + +Texture2D InputTexture : register( t0 ); +RWTexture2D OutputTexture : register( u0 ); + +[numthreads( 8, 8, 1 )] +void CSMain( uint3 DTid : SV_DispatchThreadID ) +{ + if ( DTid.x >= outputWidth || DTid.y >= outputHeight ) + return; + + if ( isCopyPass ) + { + // Mip 0: straight copy from the depth buffer (reversed-Z, so 0 = far) + OutputTexture[DTid.xy] = InputTexture.Load( int3( DTid.xy, 0 ) ); + } + else + { + // 2x2 MAX downsample from the previous mip level of the Hi-Z texture. + // With reversed-Z depth (near=1, far=0), we take the MAX to get the + // NEAREST (closest to camera) surface per tile. + // + // CS_CullVobs then takes the MIN across footprint texels of this MAX chain, + // finding the least-occluded tile in the AABB's screen projection. + // The test "maxDepth < hiZDepth" passes only when the AABB's nearest corner + // (maxDepth) is farther than the nearest occluder in every tile of the footprint. + // + // Using MIN here instead would collapse every tile touching the sky to ~0, + // making the test never fire since depth values are non-negative. + uint2 srcBase = DTid.xy * 2; + + float d00 = InputTexture.Load( int3( srcBase + uint2( 0, 0 ), inputMipLevel ) ); + float d10 = InputTexture.Load( int3( srcBase + uint2( 1, 0 ), inputMipLevel ) ); + float d01 = InputTexture.Load( int3( srcBase + uint2( 0, 1 ), inputMipLevel ) ); + float d11 = InputTexture.Load( int3( srcBase + uint2( 1, 1 ), inputMipLevel ) ); + + OutputTexture[DTid.xy] = max( max( d00, d10 ), max( d01, d11 ) ); + } +} diff --git a/D3D11Engine/Shaders/CS_CullVobs.hlsl b/D3D11Engine/Shaders/CS_CullVobs.hlsl index 1ce3bbde..f335313d 100644 --- a/D3D11Engine/Shaders/CS_CullVobs.hlsl +++ b/D3D11Engine/Shaders/CS_CullVobs.hlsl @@ -14,6 +14,11 @@ cbuffer CullCB : register( b0 ) uint windAdvanced; uint numVobs; uint feedbackFrameNumber; + uint enableHiZ; + uint hiZMipCount; + float hiZWidth; + float hiZHeight; + float4x4 viewProjection; }; struct VobGPUData @@ -61,6 +66,7 @@ struct VobInstanceInfoAtlas StructuredBuffer VobBuffer : register( t0 ); StructuredBuffer SubmeshBuffer : register( t1 ); +Texture2D HiZTexture : register( t2 ); RWStructuredBuffer InstanceOutput : register( u0 ); RWByteAddressBuffer IndirectArgsUAV : register( u1 ); @@ -69,6 +75,99 @@ RWByteAddressBuffer IndirectArgsUAV : register( u1 ); // cheaper than per-pixel atomics in the pixel shader. RWTexture2D FeedbackUAV : register( u5 ); +// Hi-Z occlusion test: project AABB to screen, pick mip level, compare depth. +// Returns true if the AABB is OCCLUDED (should be culled). +bool IsOccludedHiZ( float3 aabbCenter, float3 aabbExtent ) +{ + // Generate all 8 corners of the AABB + float3 corners[8]; + corners[0] = aabbCenter + float3( -aabbExtent.x, -aabbExtent.y, -aabbExtent.z ); + corners[1] = aabbCenter + float3( aabbExtent.x, -aabbExtent.y, -aabbExtent.z ); + corners[2] = aabbCenter + float3( -aabbExtent.x, aabbExtent.y, -aabbExtent.z ); + corners[3] = aabbCenter + float3( aabbExtent.x, aabbExtent.y, -aabbExtent.z ); + corners[4] = aabbCenter + float3( -aabbExtent.x, -aabbExtent.y, aabbExtent.z ); + corners[5] = aabbCenter + float3( aabbExtent.x, -aabbExtent.y, aabbExtent.z ); + corners[6] = aabbCenter + float3( -aabbExtent.x, aabbExtent.y, aabbExtent.z ); + corners[7] = aabbCenter + float3( aabbExtent.x, aabbExtent.y, aabbExtent.z ); + + float minX = 1.0, minY = 1.0, maxX = 0.0, maxY = 0.0; + float maxDepth = 0.0; // Reversed-Z: nearest corner has the highest Z. Track max across corners. + + [unroll] + for ( int i = 0; i < 8; i++ ) + { + float4 clip = mul( float4( corners[i], 1.0 ), viewProjection ); + + // Behind camera — can't occlude, bail out as visible + if ( clip.w <= 0.0 ) + return false; + + float3 ndc = clip.xyz / clip.w; + + // NDC to UV [0,1] range (Y is flipped for texture space) + float u = ndc.x * 0.5 + 0.5; + float v = -ndc.y * 0.5 + 0.5; + + minX = min( minX, u ); + maxX = max( maxX, u ); + minY = min( minY, v ); + maxY = max( maxY, v ); + + // Track the nearest AABB corner (highest Z in reversed-Z) + maxDepth = max( maxDepth, ndc.z ); + } + + // Clamp to screen bounds + minX = saturate( minX ); + maxX = saturate( maxX ); + minY = saturate( minY ); + maxY = saturate( maxY ); + + // Degenerate or off-screen — treat as visible + if ( minX >= maxX || minY >= maxY ) + return false; + + // Compute screen-space size in pixels at mip 0 + float sizeX = ( maxX - minX ) * hiZWidth; + float sizeY = ( maxY - minY ) * hiZHeight; + float maxSize = max( sizeX, sizeY ); + + // Pick mip level: we want the mip where the AABB covers roughly 2x2 texels + float mipF = ceil( log2( max( maxSize, 1.0 ) ) ); + uint mip = min( (uint)mipF, hiZMipCount - 1 ); + + // Compute texel coordinates at this mip level + float mipWidth = max( hiZWidth / (float)( 1u << mip ), 1.0 ); + float mipHeight = max( hiZHeight / (float)( 1u << mip ), 1.0 ); + + int2 texMin = int2( minX * mipWidth, minY * mipHeight ); + int2 texMax = int2( maxX * mipWidth, maxY * mipHeight ); + + // Clamp to valid range + texMin = max( texMin, int2( 0, 0 ) ); + texMax = min( texMax, int2( (int)mipWidth - 1, (int)mipHeight - 1 ) ); + + // Sample Hi-Z: take the min depth across the covered texels. + // MIN mip chain stores farthest depth per texel (reversed-Z: smallest Z = farthest). + // We take min across texels to get the overall farthest surface — conservative. + float hiZDepth = 1.0; + for ( int y = texMin.y; y <= texMax.y; y++ ) + { + for ( int x = texMin.x; x <= texMax.x; x++ ) + { + hiZDepth = min( hiZDepth, HiZTexture.Load( int3( x, y, mip ) ) ); + } + } + + // Reversed-Z: near=1, far=0. + // maxDepth = nearest AABB corner (highest Z in reversed-Z). + // HiZ is a MAX mip chain: each texel = nearest surface (highest Z) in its region. + // We take MIN across the AABB footprint texels to find the least-occluded tile. + // AABB is occluded when its nearest corner is farther than the nearest surface + // in every footprint tile, i.e. maxDepth < min(hiZMaxValues) = hiZDepth. + return ( maxDepth < hiZDepth ); +} + [numthreads( 64, 1, 1 )] void CSMain( uint3 DTid : SV_DispatchThreadID ) { @@ -96,6 +195,13 @@ void CSMain( uint3 DTid : SV_DispatchThreadID ) return; // fully outside this plane } + // Hi-Z occlusion cull: test AABB against hierarchical depth buffer + if ( enableHiZ ) + { + if ( IsOccludedHiZ( vob.aabbCenter, vob.aabbExtent ) ) + return; + } + // Compute wind strength for this vob float windStr = 0.0; if ( vob.aniModeStrength > 0.0 && windAdvanced ) diff --git a/D3D11Engine/Toolbox.cpp b/D3D11Engine/Toolbox.cpp index 0b0fca05..dc504b17 100644 --- a/D3D11Engine/Toolbox.cpp +++ b/D3D11Engine/Toolbox.cpp @@ -150,6 +150,25 @@ namespace Toolbox { return _mm_cvtss_f32( _mm_rcp_ss( _mm_rsqrt_ss( _mm_set_ss( dx * dx + dz * dz ) ) ) ); } + float ComputePointAABBDistanceSq( const XMFLOAT3& p, const XMFLOAT3& min, const XMFLOAT3& max ) { + float dx = std::max( std::max( min.x - p.x, 0.0f ), p.x - max.x ); + float dy = std::max( std::max( min.y - p.y, 0.0f ), p.y - max.y ); + float dz = std::max( std::max( min.z - p.z, 0.0f ), p.z - max.z ); + + return (dx * dx) + (dy * dy) + (dz * dz); + } + + float ComputePointAABBDistanceSq( const XMFLOAT3& p, const DirectX::BoundingBox& box ) { + // 1. Get absolute distance from point to the center of the box + // 2. Subtract the box extents to get the distance to the edge + // 3. Clamp to 0 if the point is inside the box bounds along that axis + float dx = std::max( 0.0f, std::abs( p.x - box.Center.x ) - box.Extents.x ); + float dy = std::max( 0.0f, std::abs( p.y - box.Center.y ) - box.Extents.y ); + float dz = std::max( 0.0f, std::abs( p.z - box.Center.z ) - box.Extents.z ); + + return (dx * dx) + (dy * dy) + (dz * dz); + } + /** Computes the Normal of a triangle */ FXMVECTOR ComputeNormal( const XMFLOAT3& v0, const XMFLOAT3& v1, const XMFLOAT3& v2 ) { FXMVECTOR Normal = XMVector3Normalize( XMVector3Cross( (XMLoadFloat3( &v1 ) - XMLoadFloat3( &v0 )), (XMLoadFloat3( &v2 ) - XMLoadFloat3( &v0 )) ) ); diff --git a/D3D11Engine/Toolbox.h b/D3D11Engine/Toolbox.h index 371dc8b7..2b06c1c6 100644 --- a/D3D11Engine/Toolbox.h +++ b/D3D11Engine/Toolbox.h @@ -6,6 +6,7 @@ #include #include "Types.h" +#include /** Misc. tools */ enum zTCam_ClipType; @@ -171,6 +172,10 @@ namespace Toolbox { /** Computes the distance of a point to an AABB */ float ComputePointAABBDistance( const XMFLOAT3& p, const XMFLOAT3& min, const XMFLOAT3& max ); + float ComputePointAABBDistanceSq(const XMFLOAT3& p, const XMFLOAT3& min, const XMFLOAT3& max); + + float ComputePointAABBDistanceSq(const XMFLOAT3& p, const DirectX::BoundingBox& box); + /** Returns whether the given file exists */ bool FileExists( const std::string& file ); From 3684605d67fbbee35ed07fd5a3b917d4d266d120 Mon Sep 17 00:00:00 2001 From: kirides <13602143+kirides@users.noreply.github.com> Date: Mon, 9 Mar 2026 11:20:39 +0100 Subject: [PATCH 05/42] fix BC alignment issue --- D3D11Engine/D3D11TextureAtlasManager.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/D3D11Engine/D3D11TextureAtlasManager.h b/D3D11Engine/D3D11TextureAtlasManager.h index 832e7eda..660294a4 100644 --- a/D3D11Engine/D3D11TextureAtlasManager.h +++ b/D3D11Engine/D3D11TextureAtlasManager.h @@ -96,9 +96,12 @@ class TextureManager { const DirectX::Image* src = finalChain->GetImage( chainIdx, 0, 0 ); if ( !src || !src->pixels ) continue; + // BC formats require texture dimensions to be multiples of the block size (4). + // Small mips can be sub-block, so align up to avoid CREATETEXTURE2D_INVALIDDIMENSIONS. + UINT bsz = GetBlockSize( atlasFormat ); D3D11_TEXTURE2D_DESC tmpDesc = {}; - tmpDesc.Width = (UINT)src->width; - tmpDesc.Height = (UINT)src->height; + tmpDesc.Width = Align( (UINT)src->width, bsz ); + tmpDesc.Height = Align( (UINT)src->height, bsz ); tmpDesc.MipLevels = 1; tmpDesc.ArraySize = 1; tmpDesc.Format = src->format; @@ -115,7 +118,7 @@ class TextureManager { UINT mipX = item.x >> mip; UINT mipY = item.y >> mip; UINT dstSub = D3D11CalcSubresource( mip, item.slice, mipLevels ); - D3D11_BOX box = { 0, 0, 0, (UINT)src->width, (UINT)src->height, 1 }; + D3D11_BOX box = { 0, 0, 0, tmpDesc.Width, tmpDesc.Height, 1 }; context->CopySubresourceRegion( atlasTextureArray, dstSub, mipX, mipY, 0, tmpTex, 0, &box ); tmpTex->Release(); } From 7c1213164b220d79405bd29136bca79e1a7faab9 Mon Sep 17 00:00:00 2001 From: kirides <13602143+kirides@users.noreply.github.com> Date: Mon, 9 Mar 2026 14:07:07 +0100 Subject: [PATCH 06/42] add Atlas support for world mesh drawing --- D3D11Engine/ConstantBufferStructs.h | 16 + D3D11Engine/D3D11GraphicsEngine.cpp | 516 +++++++++++++++++++++-- D3D11Engine/D3D11GraphicsEngine.h | 21 + D3D11Engine/D3D11IndirectBuffer.cpp | 5 +- D3D11Engine/D3D11ShaderManager.cpp | 14 +- D3D11Engine/ShaderIDs.h | 2 + D3D11Engine/Shaders/PS_WorldAtlas.hlsl | 165 ++++++++ D3D11Engine/Shaders/Toolbox.h | 12 + D3D11Engine/Shaders/VS_ExWorldAtlas.hlsl | 98 +++++ 9 files changed, 817 insertions(+), 32 deletions(-) create mode 100644 D3D11Engine/Shaders/PS_WorldAtlas.hlsl create mode 100644 D3D11Engine/Shaders/VS_ExWorldAtlas.hlsl diff --git a/D3D11Engine/ConstantBufferStructs.h b/D3D11Engine/ConstantBufferStructs.h index b7167686..0bf452fa 100644 --- a/D3D11Engine/ConstantBufferStructs.h +++ b/D3D11Engine/ConstantBufferStructs.h @@ -73,6 +73,22 @@ struct SubmeshGPUData { UINT globalSourceIndex; // global source index into feedback texture }; +// Per-submesh data for the world mesh atlas indirect draw path. +// Read by VS_ExWorldAtlas via StructuredBuffer. +struct WorldMeshSubmeshGPUData { + // Diffuse atlas + int diffuseSlice; + float dUStart, dVStart, dUEnd, dVEnd; + // Normal atlas + int normalSlice; + float nUStart, nVStart, nUEnd, nVEnd; + // FX atlas + int fxSlice; + float fUStart, fVStart, fUEnd, fVEnd; + // Flags: 1 = HAS_NORMAL, 2 = HAS_FX, 4 = ALPHA_TEST + UINT flags; +}; + // Constant buffer for the GPU cull compute shader struct CullConstants { XMFLOAT4 frustumPlanes[6]; diff --git a/D3D11Engine/D3D11GraphicsEngine.cpp b/D3D11Engine/D3D11GraphicsEngine.cpp index e624e5bb..a0b26841 100644 --- a/D3D11Engine/D3D11GraphicsEngine.cpp +++ b/D3D11Engine/D3D11GraphicsEngine.cpp @@ -3926,13 +3926,6 @@ XRESULT D3D11GraphicsEngine::DrawWorldMesh_Indirect( bool noTextures ) { if ( !Engine::GAPI->GetRendererState().RendererSettings.DrawWorldMesh ) return XR_SUCCESS; - struct MDI_DrawArgs - { - unsigned int DrawCount; - unsigned int AlignedByteOffsetForArgs; - MaterialInfo* MeshMaterialInfo; - }; - // Setup default renderstates SetDefaultStates(); @@ -3940,6 +3933,16 @@ XRESULT D3D11GraphicsEngine::DrawWorldMesh_Indirect( bool noTextures ) { Engine::GAPI->SetViewTransformXM( view ); Engine::GAPI->ResetWorldTransform(); + // Draw atlas path first (handles opaque + alpha-test submeshes that were atlased) + DrawWorldMesh_Atlas(); + + struct MDI_DrawArgs + { + unsigned int DrawCount; + unsigned int AlignedByteOffsetForArgs; + MaterialInfo* MeshMaterialInfo; + }; + SetActivePixelShader( PShaderID::PS_Diffuse ); SetActiveVertexShader( VShaderID::VS_Ex ); @@ -3989,6 +3992,10 @@ XRESULT D3D11GraphicsEngine::DrawWorldMesh_Indirect( bool noTextures ) { for ( auto const& renderItem : renderList ) { for ( auto const& worldMesh : renderItem->WorldMeshes ) { + // Skip submeshes already drawn by the atlas path + if ( m_WorldMeshAtlasedSubmeshes.count( worldMesh.second ) ) + continue; + zCTexture* aniTex = worldMesh.first.Material->GetTexture(); if ( !aniTex ) continue; @@ -4199,6 +4206,8 @@ XRESULT D3D11GraphicsEngine::DrawWorldMesh( bool noTextures ) { Engine::GAPI->SetViewTransformXM( view ); Engine::GAPI->ResetWorldTransform(); + DrawWorldMesh_Atlas(); + SetActivePixelShader( PShaderID::PS_Diffuse ); SetActiveVertexShader( VShaderID::VS_Ex ); @@ -4244,6 +4253,10 @@ XRESULT D3D11GraphicsEngine::DrawWorldMesh( bool noTextures ) { for ( auto const& renderItem : renderList ) { for ( auto const& worldMesh : renderItem->WorldMeshes ) { + // Skip submeshes already drawn by the atlas path + if ( m_WorldMeshAtlasedSubmeshes.count( worldMesh.second ) ) + continue; + if ( worldMesh.first.Material ) { zCTexture* aniTex = worldMesh.first.Material->GetTexture(); if ( !aniTex ) continue; @@ -5120,10 +5133,6 @@ void D3D11GraphicsEngine::ShadowPass_DrawWorldMesh_Indirect(const std::vectorGetRendererState().GraphicsState.FF_AlphaRef; bool linearDepth = (Engine::GAPI->GetRendererState().GraphicsState.FF_GSwitches & GSWITCH_LINEAR_DEPTH) != 0; - - auto drawMultiIndexedInstancedIndirect = Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.FeatureSet.UseMDI - ? DrawMultiIndexedInstancedIndirect - : Stub_DrawMultiIndexedInstancedIndirect; if ( Engine::GAPI->GetRendererState().RendererSettings.FastShadows ) { @@ -5198,7 +5207,7 @@ void D3D11GraphicsEngine::ShadowPass_DrawWorldMesh_Indirect(const std::vector(opaqueDrawArgs.size()), WorldMeshIndirectBuffer->GetIndirectBuffer().Get(), 0, @@ -6402,23 +6411,14 @@ XRESULT D3D11GraphicsEngine::DrawVOBsIndirect( const Frustum& frustum, bool bind context->PSSetShader( nullptr, nullptr, 0 ); } - if ( DrawMultiIndexedInstancedIndirect ) { - // Vendor multi-draw-indirect: all submeshes in this group in one API call - DrawMultiIndexedInstancedIndirect( - context.Get(), - group.mergedArgsCount, - m_MergedIndirectArgs->GetIndirectBuffer().Get(), - group.mergedArgsOffset, - sizeof( D3D11_DRAW_INDEXED_INSTANCED_INDIRECT_ARGS ) ); - } else { - // Fallback: one DrawIndexedInstancedIndirect per submesh - // InstanceCount is GPU-written, so zero-instance draws are no-ops on GPU - for ( UINT i = 0; i < group.mergedArgsCount; i++ ) { - context->DrawIndexedInstancedIndirect( - m_MergedIndirectArgs->GetIndirectBuffer().Get(), - group.mergedArgsOffset + i * sizeof( D3D11_DRAW_INDEXED_INSTANCED_INDIRECT_ARGS ) ); - } - } + // DrawMultiIndexedInstancedIndirect falls back to individual DrawIndexedInstancedIndirect + // calls via Stub_DrawMultiIndexedInstancedIndirect if hardware doesn't support MDI + DrawMultiIndexedInstancedIndirect( + context.Get(), + group.mergedArgsCount, + m_MergedIndirectArgs->GetIndirectBuffer().Get(), + group.mergedArgsOffset, + sizeof( D3D11_DRAW_INDEXED_INSTANCED_INDIRECT_ARGS ) ); } // Unbind instance buffer @@ -8703,6 +8703,459 @@ void D3D11GraphicsEngine::BuildSceneTextureAtlasses() { BuildGPUCullingBuffers(); } +void D3D11GraphicsEngine::BuildWorldMeshTextureAtlasses() { + for ( size_t i = 0; i < TEXTURE_ATLAS_MAX; i++ ) { + m_WorldMeshDiffuseAtlasses[(DXGI_FORMAT)i].Destroy(); + m_WorldMeshNormalAtlasses[(DXGI_FORMAT)i].Destroy(); + m_WorldMeshFxAtlasses[(DXGI_FORMAT)i].Destroy(); + } + m_WorldMeshDiffuseAtlasLookup.clear(); + m_WorldMeshNormalAtlasLookup.clear(); + m_WorldMeshFxAtlasLookup.clear(); + m_WorldMeshAtlasDrawGroups.clear(); + m_WorldMeshAtlasedSubmeshes.clear(); + m_WorldMeshGlobalVertexBuffer.reset(); + m_WorldMeshGlobalIndexBuffer.reset(); + m_WorldMeshGlobalInstanceIdBuffer.reset(); + m_WorldMeshSubmeshBuffer.reset(); + + if ( !SupportTextureAtlases ) { + return; + } + + // --- 1. Collect unique diffuse textures from world mesh --- + struct DiffuseTextureInfo { + zCTexture* gothicTexture; + DXGI_FORMAT Format; + Microsoft::WRL::ComPtr Texture2D; + }; + std::unordered_set seenDiffuse; + std::vector uniqueDiffuse; + + struct AuxTextureInfo { + D3D11Texture* engineTexture; + DXGI_FORMAT Format; + Microsoft::WRL::ComPtr Texture2D; + }; + std::unordered_set seenNormal, seenFx; + std::vector uniqueNormals, uniqueFx; + + auto& worldSections = Engine::GAPI->GetWorldSections(); + for ( auto& [x, row] : worldSections ) { + for ( auto& [y, section] : row ) { + for ( auto const& [meshKey, worldMeshInfo] : section.WorldMeshes ) { + if ( !meshKey.Material ) continue; + + // Skip animated textures + zCTexture* baseTex = meshKey.Material->GetTextureSingle(); + if ( !baseTex ) continue; + unsigned char texFlags = *reinterpret_cast( + reinterpret_cast(baseTex) + GothicMemoryLocations::zCTexture::Offset_Flags ); + if ( texFlags & GothicMemoryLocations::zCTexture::Mask_FlagIsAnimated ) + continue; + + // Skip alpha-blended (only opaque + alpha-test) + int alphaFunc = meshKey.Material->GetAlphaFunc(); + if ( alphaFunc > zMAT_ALPHA_FUNC_NONE && alphaFunc != zMAT_ALPHA_FUNC_TEST ) + continue; + + // Skip non-standard materials (water, portals, etc.) + if ( meshKey.Info && meshKey.Info->MaterialType != MaterialInfo::MT_None ) + continue; + + zCTexture* tex = baseTex; + auto cachedState = tex->CacheIn( -1 ); + if ( cachedState != zRES_CACHED_IN ) continue; + + auto surface = tex->GetSurface(); + if ( !surface || !surface->IsSurfaceReady() ) continue; + + auto engineTex = surface->GetEngineTexture(); + if ( !engineTex ) continue; + + // Diffuse + if ( seenDiffuse.insert( tex ).second ) { + D3D11_TEXTURE2D_DESC desc; + engineTex->GetTextureObject()->GetDesc( &desc ); + if ( desc.Format >= 1 && desc.Format < TEXTURE_ATLAS_MAX ) { + uniqueDiffuse.push_back( { tex, desc.Format, engineTex->GetTextureObject() } ); + } + } + + // Normal map + D3D11Texture* normalTex = surface->GetNormalmap(); + if ( normalTex && seenNormal.insert( normalTex ).second ) { + D3D11_TEXTURE2D_DESC desc; + normalTex->GetTextureObject()->GetDesc( &desc ); + if ( desc.Format >= 1 && desc.Format < TEXTURE_ATLAS_MAX ) { + uniqueNormals.push_back( { normalTex, desc.Format, normalTex->GetTextureObject() } ); + } + } + + // FX map + D3D11Texture* fxTex = surface->GetFxMap(); + if ( fxTex && seenFx.insert( fxTex ).second ) { + D3D11_TEXTURE2D_DESC desc; + fxTex->GetTextureObject()->GetDesc( &desc ); + if ( desc.Format >= 1 && desc.Format < TEXTURE_ATLAS_MAX ) { + uniqueFx.push_back( { fxTex, desc.Format, fxTex->GetTextureObject() } ); + } + } + } + } + } + + // --- 2. Build atlases per format group for each texture type --- + auto buildDiffuseAtlases = [&]() { + std::sort( uniqueDiffuse.begin(), uniqueDiffuse.end(), + []( const DiffuseTextureInfo& a, const DiffuseTextureInfo& b ) { return a.Format < b.Format; } ); + + size_t rangeStart = 0; + while ( rangeStart < uniqueDiffuse.size() ) { + DXGI_FORMAT fmt = uniqueDiffuse[rangeStart].Format; + size_t rangeEnd = rangeStart; + while ( rangeEnd < uniqueDiffuse.size() && uniqueDiffuse[rangeEnd].Format == fmt ) + rangeEnd++; + + std::vector texPtrs; + texPtrs.reserve( rangeEnd - rangeStart ); + for ( size_t i = rangeStart; i < rangeEnd; i++ ) + texPtrs.push_back( uniqueDiffuse[i].Texture2D.Get() ); + + std::basic_string_view txView( texPtrs.data(), texPtrs.size() ); + TextureManager::AtlasResult atlas = TextureManager::CreateAtlasArray( + GetDevice().Get(), GetContext().Get(), txView, 2048, 6 ); + + for ( size_t i = 0; i < texPtrs.size(); i++ ) { + m_WorldMeshDiffuseAtlasLookup[uniqueDiffuse[rangeStart + i].gothicTexture] = { + fmt, atlas.descriptors[i] + }; + } + m_WorldMeshDiffuseAtlasses[fmt] = atlas; + rangeStart = rangeEnd; + } + }; + + auto buildAuxAtlases = []( std::vector& textures, + std::unordered_map& lookup, + std::array& atlasses, + ID3D11Device* device, ID3D11DeviceContext* context ) { + std::sort( textures.begin(), textures.end(), + []( const AuxTextureInfo& a, const AuxTextureInfo& b ) { return a.Format < b.Format; } ); + + size_t rangeStart = 0; + while ( rangeStart < textures.size() ) { + DXGI_FORMAT fmt = textures[rangeStart].Format; + size_t rangeEnd = rangeStart; + while ( rangeEnd < textures.size() && textures[rangeEnd].Format == fmt ) + rangeEnd++; + + std::vector texPtrs; + texPtrs.reserve( rangeEnd - rangeStart ); + for ( size_t i = rangeStart; i < rangeEnd; i++ ) + texPtrs.push_back( textures[i].Texture2D.Get() ); + + std::basic_string_view txView( texPtrs.data(), texPtrs.size() ); + TextureManager::AtlasResult atlas = TextureManager::CreateAtlasArray( + device, context, txView, 2048, 6 ); + + for ( size_t i = 0; i < texPtrs.size(); i++ ) { + lookup[textures[rangeStart + i].engineTexture] = { + fmt, atlas.descriptors[i] + }; + } + atlasses[fmt] = atlas; + rangeStart = rangeEnd; + } + }; + + buildDiffuseAtlases(); + buildAuxAtlases( uniqueNormals, m_WorldMeshNormalAtlasLookup, m_WorldMeshNormalAtlasses, + GetDevice().Get(), GetContext().Get() ); + buildAuxAtlases( uniqueFx, m_WorldMeshFxAtlasLookup, m_WorldMeshFxAtlasses, + GetDevice().Get(), GetContext().Get() ); + + LogInfo() << "World Mesh Atlas: " << uniqueDiffuse.size() << " diffuse, " + << uniqueNormals.size() << " normal, " << uniqueFx.size() << " fx textures"; + + BuildStaticWorldMeshBuffers(); +} + +void D3D11GraphicsEngine::BuildStaticWorldMeshBuffers() { + std::vector allVertices; + std::vector allIndices; + std::vector submeshGPU; + + // Group by diffuse atlas format (normal/fx may differ but we key on diffuse) + std::map groupsByFormat; + + std::unordered_set processedMeshes; + + auto& worldSections = Engine::GAPI->GetWorldSections(); + for ( auto& [x, row] : worldSections ) { + for ( auto& [y, section] : row ) { + for ( auto const& [meshKey, worldMeshInfo] : section.WorldMeshes ) { + if ( !meshKey.Material ) continue; + + zCTexture* tex = meshKey.Material->GetTextureSingle(); + auto diffIt = m_WorldMeshDiffuseAtlasLookup.find( tex ); + if ( diffIt == m_WorldMeshDiffuseAtlasLookup.end() ) + continue; // not in atlas + + MeshInfo* mi = worldMeshInfo; + if ( !processedMeshes.insert( mi ).second ) + continue; // already added + + m_WorldMeshAtlasedSubmeshes.insert( mi ); + + const TextureAtlasLookup& diffLookup = diffIt->second; + auto& group = groupsByFormat[diffLookup.atlasFormat]; + group.format = diffLookup.atlasFormat; + + UINT baseVertex = static_cast(allVertices.size()); + UINT startIndex = static_cast(allIndices.size()); + + allVertices.insert( allVertices.end(), mi->Vertices.begin(), mi->Vertices.end() ); + allIndices.insert( allIndices.end(), mi->Indices.begin(), mi->Indices.end() ); + + // Build GPU descriptor for this submesh + WorldMeshSubmeshGPUData gpuData = {}; + gpuData.diffuseSlice = diffLookup.descriptor.slice; + gpuData.dUStart = diffLookup.descriptor.uStart; + gpuData.dVStart = diffLookup.descriptor.vStart; + gpuData.dUEnd = diffLookup.descriptor.uEnd; + gpuData.dVEnd = diffLookup.descriptor.vEnd; + + UINT flags = 0; + + // Normal map lookup + auto surface = tex->GetSurface(); + if ( surface ) { + D3D11Texture* normalTex = surface->GetNormalmap(); + if ( normalTex ) { + auto normIt = m_WorldMeshNormalAtlasLookup.find( normalTex ); + if ( normIt != m_WorldMeshNormalAtlasLookup.end() ) { + gpuData.normalSlice = normIt->second.descriptor.slice; + gpuData.nUStart = normIt->second.descriptor.uStart; + gpuData.nVStart = normIt->second.descriptor.vStart; + gpuData.nUEnd = normIt->second.descriptor.uEnd; + gpuData.nVEnd = normIt->second.descriptor.vEnd; + flags |= 1; // HAS_NORMAL + } + } + + D3D11Texture* fxTex = surface->GetFxMap(); + if ( fxTex ) { + auto fxIt = m_WorldMeshFxAtlasLookup.find( fxTex ); + if ( fxIt != m_WorldMeshFxAtlasLookup.end() ) { + gpuData.fxSlice = fxIt->second.descriptor.slice; + gpuData.fUStart = fxIt->second.descriptor.uStart; + gpuData.fVStart = fxIt->second.descriptor.vStart; + gpuData.fUEnd = fxIt->second.descriptor.uEnd; + gpuData.fVEnd = fxIt->second.descriptor.vEnd; + flags |= 2; // HAS_FX + } + } + } + + // Alpha test flag + int alphaFunc = meshKey.Material->GetAlphaFunc(); + if ( alphaFunc == zMAT_ALPHA_FUNC_TEST || tex->HasAlphaChannel() ) { + flags |= 4; // ALPHA_TEST + } + + gpuData.flags = flags; + + UINT submeshIndex = static_cast(submeshGPU.size()); + submeshGPU.push_back( gpuData ); + + // Indirect draw arg — InstanceCount=1 (world mesh is not instanced), + // StartInstanceLocation = submeshIndex (used as VS instance remap) + D3D11_DRAW_INDEXED_INSTANCED_INDIRECT_ARGS args = {}; + args.IndexCountPerInstance = static_cast(mi->Indices.size()); + args.InstanceCount = 1; + args.StartIndexLocation = startIndex; + args.BaseVertexLocation = static_cast(baseVertex); + args.StartInstanceLocation = submeshIndex; + group.indirectArgs.push_back( args ); + } + } + } + + if ( allVertices.empty() ) { + LogWarn() << "BuildStaticWorldMeshBuffers: No world mesh vertices for atlas"; + return; + } + + // Create global vertex buffer (IMMUTABLE) + m_WorldMeshGlobalVertexBuffer = std::make_unique(); + m_WorldMeshGlobalVertexBuffer->Init( + allVertices.data(), + static_cast(allVertices.size() * sizeof( ExVertexStruct )), + D3D11VertexBuffer::B_VERTEXBUFFER, + D3D11VertexBuffer::U_IMMUTABLE, + D3D11VertexBuffer::CA_NONE ); + + // Create global index buffer (IMMUTABLE) + m_WorldMeshGlobalIndexBuffer = std::make_unique(); + m_WorldMeshGlobalIndexBuffer->Init( + allIndices.data(), + static_cast(allIndices.size() * sizeof( VERTEX_INDEX )), + D3D11VertexBuffer::B_INDEXBUFFER, + D3D11VertexBuffer::U_IMMUTABLE, + D3D11VertexBuffer::CA_NONE ); + + // Instance ID buffer: just {0,1,2,...} so the VS can read submeshIdx + // For world mesh, each submesh draws exactly 1 instance, + // and StartInstanceLocation = submeshIndex in the indirect args + UINT maxIds = static_cast(submeshGPU.size()); + if ( maxIds < 256 ) maxIds = 256; + std::vector instanceIds( maxIds ); + for ( uint32_t i = 0; i < maxIds; i++ ) + instanceIds[i] = i; + + m_WorldMeshGlobalInstanceIdBuffer = std::make_unique(); + m_WorldMeshGlobalInstanceIdBuffer->Init( + instanceIds.data(), + static_cast(instanceIds.size() * sizeof( uint32_t )), + D3D11VertexBuffer::B_VERTEXBUFFER, + D3D11VertexBuffer::U_IMMUTABLE, + D3D11VertexBuffer::CA_NONE ); + + // Structured buffer for submesh GPU data + auto* device = GetDevice().Get(); + auto* context = GetContext().Get(); + m_WorldMeshSubmeshBuffer = std::make_unique>(); + m_WorldMeshSubmeshBuffer->Init( device, static_cast(submeshGPU.size()), false, false ); + m_WorldMeshSubmeshBuffer->UpdateBufferDefault( context, submeshGPU.data(), static_cast(submeshGPU.size()) ); + + // Move groups and create indirect buffers + m_WorldMeshAtlasDrawGroups.clear(); + for ( auto& [fmt, group] : groupsByFormat ) { + if ( group.indirectArgs.empty() ) + continue; + + UINT bufSize = static_cast(group.indirectArgs.size() * sizeof( D3D11_DRAW_INDEXED_INSTANCED_INDIRECT_ARGS )); + group.indirectBuffer = std::make_unique(); + group.indirectBuffer->Init( + group.indirectArgs.data(), bufSize, + D3D11IndirectBuffer::B_VERTEXBUFFER, + D3D11IndirectBuffer::U_IMMUTABLE, + D3D11IndirectBuffer::CA_NONE ); + + m_WorldMeshAtlasDrawGroups.push_back( std::move( group ) ); + } + + LogInfo() << "World Mesh Atlas geometry: " << allVertices.size() << " vertices, " + << allIndices.size() << " indices, " + << m_WorldMeshAtlasDrawGroups.size() << " format groups, " + << submeshGPU.size() << " submeshes"; +} + +XRESULT D3D11GraphicsEngine::DrawWorldMesh_Atlas() { + if ( m_WorldMeshAtlasDrawGroups.empty() || !m_WorldMeshGlobalVertexBuffer || !m_WorldMeshGlobalIndexBuffer ) + return XR_SUCCESS; + + auto _ = RecordGraphicsEvent( L"DrawWorldMesh_Atlas" ); + auto& context = GetContext(); + + // Reset render states to opaque defaults (depth write on, no blending, etc.) + SetDefaultStates(); + + XMMATRIX view = Engine::GAPI->GetViewMatrixXM(); + Engine::GAPI->SetViewTransformXM( view ); + Engine::GAPI->ResetWorldTransform(); + + context->DSSetShader( nullptr, nullptr, 0 ); + context->HSSetShader( nullptr, nullptr, 0 ); + + // --- 1. Bind global geometry (once) --- + UINT strides[2] = { sizeof( ExVertexStruct ), sizeof( uint32_t ) }; + UINT offsets[2] = { 0, 0 }; + ID3D11Buffer* vbs[2] = { + m_WorldMeshGlobalVertexBuffer->GetVertexBuffer().Get(), + m_WorldMeshGlobalInstanceIdBuffer->GetVertexBuffer().Get() + }; + context->IASetVertexBuffers( 0, 2, vbs, strides, offsets ); + context->IASetIndexBuffer( m_WorldMeshGlobalIndexBuffer->GetVertexBuffer().Get(), VERTEX_INDEX_DXGI_FORMAT, 0 ); + context->IASetPrimitiveTopology( D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST ); + + // --- 2. Bind submesh StructuredBuffer to VS t1 --- + ID3D11ShaderResourceView* submeshSRV = m_WorldMeshSubmeshBuffer->GetSRV(); + context->VSSetShaderResources( 1, 1, &submeshSRV ); + + // --- 3. Set vertex shader --- + SetActiveVertexShader( VShaderID::VS_ExWorldAtlas ); + SetupVS_ExMeshDrawCall(); + SetupVS_ExConstantBuffer(); + + // World mesh is already in world space — pass identity world matrix + /* -- Atlas shader doesn't use this + ActiveVS->GetConstantBuffer()[1]->UpdateBuffer( &XMMatrixIdentity() ); + ActiveVS->GetConstantBuffer()[1]->BindToVertexShader( 1 ); + */ + ActiveVS->Apply(); + + // --- 4. Set pixel shader + constant buffers --- + SetActivePixelShader( PShaderID::PS_WorldAtlas ); + + ActivePS->GetConstantBuffer()[0]->UpdateBuffer( + &Engine::GAPI->GetRendererState().GraphicsState ); + ActivePS->GetConstantBuffer()[0]->BindToPixelShader( 0 ); + + GSky* sky = Engine::GAPI->GetSky(); + ActivePS->GetConstantBuffer()[1]->UpdateBuffer( &sky->GetAtmosphereCB() ); + ActivePS->GetConstantBuffer()[1]->BindToPixelShader( 1 ); + + MaterialInfo defMaterial{}; + ActivePS->GetConstantBuffer()[2]->UpdateBuffer( &defMaterial.buffer ); + ActivePS->GetConstantBuffer()[2]->BindToPixelShader( 2 ); + + InfiniteRangeConstantBuffer->BindToPixelShader( 3 ); + + // Bind reflection cube + context->PSSetShaderResources( 4, 1, ReflectionCube.GetAddressOf() ); + + ActivePS->Apply(); + + // --- 5. Draw per format group --- + for ( auto& group : m_WorldMeshAtlasDrawGroups ) { + // Bind atlas textures for this format group + // Diffuse atlas -> PS t0 + ID3D11ShaderResourceView* diffuseSRV = m_WorldMeshDiffuseAtlasses[group.format].atlasSRV; + if ( !diffuseSRV ) continue; + + // Find normal/FX atlases — they may be a different format, so bind all available + // We bind the first non-null atlas of each type since format grouping is per-diffuse + ID3D11ShaderResourceView* normalSRV = nullptr; + ID3D11ShaderResourceView* fxSRV = nullptr; + for ( size_t i = 0; i < TEXTURE_ATLAS_MAX; i++ ) { + if ( !normalSRV && m_WorldMeshNormalAtlasses[i].atlasSRV ) + normalSRV = m_WorldMeshNormalAtlasses[i].atlasSRV; + if ( !fxSRV && m_WorldMeshFxAtlasses[i].atlasSRV ) + fxSRV = m_WorldMeshFxAtlasses[i].atlasSRV; + } + + ID3D11ShaderResourceView* psSRVs[3] = { diffuseSRV, normalSRV, fxSRV }; + context->PSSetShaderResources( 0, 3, psSRVs ); + + // DrawMultiIndexedInstancedIndirect falls back to individual DrawIndexedInstancedIndirect + // calls via Stub_DrawMultiIndexedInstancedIndirect if hardware doesn't support MDI + DrawMultiIndexedInstancedIndirect( + context.Get(), + static_cast(group.indirectArgs.size()), + group.indirectBuffer->GetIndirectBuffer().Get(), + 0, + sizeof( D3D11_DRAW_INDEXED_INSTANCED_INDIRECT_ARGS ) ); + } + + // Unbind + ID3D11ShaderResourceView* nullSRV = nullptr; + context->VSSetShaderResources( 1, 1, &nullSRV ); + + return XR_SUCCESS; +} + void D3D11GraphicsEngine::CacheWorldStaticVobs() { static std::vector _1; @@ -8769,7 +9222,10 @@ void D3D11GraphicsEngine::OnWorldLoaded() CacheWorldStaticVobs(); // --- Atlas building: collect unique textures, create Texture2DArray atlases, map descriptors --- - BuildSceneTextureAtlasses(); + BuildSceneTextureAtlasses(); + + // --- World mesh atlas: collect textures, build atlases, merge geometry --- + BuildWorldMeshTextureAtlasses(); } void D3D11GraphicsEngine::StoreVobPreviousTransforms() { diff --git a/D3D11Engine/D3D11GraphicsEngine.h b/D3D11Engine/D3D11GraphicsEngine.h index bb44d63e..0e216570 100644 --- a/D3D11Engine/D3D11GraphicsEngine.h +++ b/D3D11Engine/D3D11GraphicsEngine.h @@ -397,6 +397,13 @@ class D3D11GraphicsEngine : public D3D11GraphicsEngineBase { void BuildSceneTextureAtlasses(); + /** World mesh atlas: collect textures, build atlases, merge geometry */ + void BuildWorldMeshTextureAtlasses(); + void BuildStaticWorldMeshBuffers(); + + /** Draw world mesh using atlas indirect path */ + XRESULT DrawWorldMesh_Atlas(); + void CacheWorldStaticVobs(); std::unique_ptr m_FrameLimiter; @@ -536,6 +543,20 @@ class D3D11GraphicsEngine : public D3D11GraphicsEngineBase { std::vector m_MergedArgsReset; // CPU-side template for reset UINT m_TotalMaxInstances = 0; + /** World mesh atlas indirect draw path */ + std::unordered_map m_WorldMeshDiffuseAtlasLookup; + std::unordered_map m_WorldMeshNormalAtlasLookup; + std::unordered_map m_WorldMeshFxAtlasLookup; + std::array m_WorldMeshDiffuseAtlasses{}; + std::array m_WorldMeshNormalAtlasses{}; + std::array m_WorldMeshFxAtlasses{}; + std::unique_ptr m_WorldMeshGlobalVertexBuffer; + std::unique_ptr m_WorldMeshGlobalIndexBuffer; + std::unique_ptr m_WorldMeshGlobalInstanceIdBuffer; + std::unique_ptr> m_WorldMeshSubmeshBuffer; + std::vector m_WorldMeshAtlasDrawGroups; + std::unordered_set m_WorldMeshAtlasedSubmeshes; // submeshes in the atlas (for legacy filter) + /** Hi-Z occlusion culling resources */ Microsoft::WRL::ComPtr m_HiZTexture; // Full mip-chain, SRV-only Microsoft::WRL::ComPtr m_HiZSRV; diff --git a/D3D11Engine/D3D11IndirectBuffer.cpp b/D3D11Engine/D3D11IndirectBuffer.cpp index c1a20c69..a2ef7eb5 100644 --- a/D3D11Engine/D3D11IndirectBuffer.cpp +++ b/D3D11Engine/D3D11IndirectBuffer.cpp @@ -83,10 +83,13 @@ XRESULT D3D11IndirectBuffer::UpdateBuffer( void* data, UINT size ) { if ( SizeInBytes < size ) { size = SizeInBytes; } + if ( !data ) { + return XR_SUCCESS; + } // Assume null-copy? if ( XR_SUCCESS == Map( EMapFlags::M_WRITE_DISCARD, &mappedData, &bsize ) ) { if ( size ) { - bsize = size; + bsize = std::min( bsize, size ); } // Copy data memcpy( mappedData, data, bsize ); diff --git a/D3D11Engine/D3D11ShaderManager.cpp b/D3D11Engine/D3D11ShaderManager.cpp index 4a9bb997..7eff6017 100644 --- a/D3D11Engine/D3D11ShaderManager.cpp +++ b/D3D11Engine/D3D11ShaderManager.cpp @@ -245,7 +245,7 @@ XRESULT D3D11ShaderManager::Init() { Shaders.push_back( ShaderInfo::make( "VS_ExInstancedObj.hlsl" ) .with_layout( 10 ) ); - Shaders.push_back( ShaderInfo::make("VS_ExInstancedObjIndirectAtlas.hlsl", 12 ) + Shaders.push_back( ShaderInfo::make("VS_ExInstancedObjIndirectAtlas.hlsl" ) .with_layout( 12 ) .with_cbuffer( sizeof( VS_ExConstantBuffer_PerFrame ) ) .with_cbuffer( sizeof( VS_ExConstantBuffer_Wind ) ) ); @@ -253,6 +253,10 @@ XRESULT D3D11ShaderManager::Init() { Shaders.push_back( ShaderInfo::make( "VS_ExInstanced.hlsl" ) .with_layout( 4 ) ); + // World mesh atlas vertex shader (uses same layout 12: ExVertexStruct + uint instance remap) + Shaders.push_back( ShaderInfo::make( "VS_ExWorldAtlas.hlsl" ) + .with_layout( 12 ); + Shaders.push_back( ShaderInfo::make( "VS_GrassInstanced.hlsl" ) .with_layout( 9 ) ); @@ -387,6 +391,14 @@ XRESULT D3D11ShaderManager::Init() { .with_cbuffer( sizeof( MaterialInfo::Buffer ) ) .with_cbuffer( sizeof( float4 ) ) ); // DIST_Distance + // World mesh atlas PS — flags-driven normal/FX/alpha-test in a single shader + makros.clear(); + Shaders.push_back( ShaderInfo::make( "PS_WorldAtlas.hlsl" ) + .with_cbuffer( sizeof( GothicGraphicsState ) ) + .with_cbuffer( sizeof( AtmosphereConstantBuffer ) ) + .with_cbuffer( sizeof( MaterialInfo::Buffer ) ) + .with_cbuffer( sizeof( float4 ) ) ); // DIST_Distance + makros.clear(); m.Name = "APPLY_RAIN_EFFECTS"; diff --git a/D3D11Engine/ShaderIDs.h b/D3D11Engine/ShaderIDs.h index 578e8e1b..dd3fe00e 100644 --- a/D3D11Engine/ShaderIDs.h +++ b/D3D11Engine/ShaderIDs.h @@ -30,6 +30,7 @@ enum class VShaderID : size_t { VS_ExCube, VS_ExNodeCube, VS_ExSkeletalCube, + VS_ExWorldAtlas, COUNT }; @@ -41,6 +42,7 @@ enum class PShaderID : size_t { PS_Rain_Snow, PS_Transparency, PS_World, + PS_WorldAtlas, PS_Water, PS_ParticleDistortion, PS_PFX_ApplyParticleDistortion, diff --git a/D3D11Engine/Shaders/PS_WorldAtlas.hlsl b/D3D11Engine/Shaders/PS_WorldAtlas.hlsl new file mode 100644 index 00000000..4845eea0 --- /dev/null +++ b/D3D11Engine/Shaders/PS_WorldAtlas.hlsl @@ -0,0 +1,165 @@ +//-------------------------------------------------------------------------------------- +// World mesh pixel shader for atlas indirect draw path +// Samples diffuse, normal and FX maps from separate Texture2DArray atlases. +// Flags bits: 1 = HAS_NORMAL, 2 = HAS_FX, 4 = ALPHA_TEST +//-------------------------------------------------------------------------------------- +#include +#include +#include +#include + +cbuffer MI_MaterialInfo : register( b2 ) +{ + float MI_SpecularIntensity; + float MI_SpecularPower; + float MI_NormalmapStrength; + float MI_ParallaxOcclusionStrength; + + float4 MI_Color; +} + +cbuffer DIST_Distance : register( b3 ) +{ + float DIST_DrawDistance; + float3 DIST_Pad; +} + +//-------------------------------------------------------------------------------------- +// Textures and Samplers +//-------------------------------------------------------------------------------------- +SamplerState SS_Linear : register( s0 ); +SamplerState SS_samMirror : register( s1 ); +Texture2DArray TX_AtlasDiffuse : register( t0 ); +Texture2DArray TX_AtlasNormal : register( t1 ); +Texture2DArray TX_AtlasFx : register( t2 ); +TextureCube TX_ReflectionCube : register( t4 ); + +//-------------------------------------------------------------------------------------- +// Input / Output structures +//-------------------------------------------------------------------------------------- +struct PS_INPUT +{ + float3 vTexcoord3D : TEXCOORD0; // (rawU, rawV, diffuseSlice) + float2 vTexcoord2 : TEXCOORD1; + float4 vDiffuse : TEXCOORD2; + float4 vAtlasRect : TEXCOORD3; // diffuse atlas rect + float3 vNormalVS : TEXCOORD4; + float3 vViewPosition : TEXCOORD5; + float4 vCurrClipPos : TEXCOORD6; + float4 vPrevClipPos : TEXCOORD7; + float3 vNormalAtlas3D : TEXCOORD8; // (rawU, rawV, normalSlice) + float4 vNormalAtlasRect : TEXCOORD9; // normal atlas rect + float3 vFxAtlas3D : TEXCOORD10; // (rawU, rawV, fxSlice) + nointerpolation uint vFlags : TEXCOORD11; + float4 vFxAtlasRect : TEXCOORD12; // fx atlas rect + float4 vPosition : SV_POSITION; +}; + +// Calculate screen-space velocity from clip positions +float2 CalculateVelocity(float4 currClipPos, float4 prevClipPos) +{ + if (currClipPos.w == 0.0 || prevClipPos.w == 0.0) + return float2(0, 0); + + float2 currNDC = currClipPos.xy / currClipPos.w; + float2 prevNDC = prevClipPos.xy / prevClipPos.w; + + float2 currUV = float2(currNDC.x * 0.5 + 0.5, 1.0 - (currNDC.y * 0.5 + 0.5)); + float2 prevUV = float2(prevNDC.x * 0.5 + 0.5, 1.0 - (prevNDC.y * 0.5 + 0.5)); + + return prevUV - currUV; +} + +// Helper: sample from an atlas Texture2DArray with correct mip via SampleGrad + frac() +// Clamps the final atlas UV inside the entry boundary, scaled by the mip level +// so that at higher mips the border grows to prevent bilinear bleed into neighbors. +static const float ATLAS_SIZE = 2048.0; + +float4 SampleAtlas(Texture2DArray atlas, SamplerState ss, float3 rawUVSlice, float4 atlasRect) +{ + float2 rawUV = rawUVSlice.xy; + float slice = rawUVSlice.z; + float2 scale = atlasRect.zw - atlasRect.xy; + float2 gradX = ddx(rawUV) * scale; + float2 gradY = ddy(rawUV) * scale; + + // Compute approximate mip level from gradients + float2 dxTex = gradX * ATLAS_SIZE; + float2 dyTex = gradY * ATLAS_SIZE; + float maxSq = max(dot(dxTex, dxTex), dot(dyTex, dyTex)); + float mipLevel = max(0.0, 0.5 * log2(maxSq)); + + // Scale the half-texel border by 2^mip so it covers the filter footprint at that level + float border = (0.5 / ATLAS_SIZE) * exp2(ceil(mipLevel)); + + float2 atlasUV = atlasRect.xy + frac(rawUV) * scale; + atlasUV = clamp(atlasUV, atlasRect.xy + border, atlasRect.zw - border); + return atlas.SampleGrad(ss, float3(atlasUV, slice), gradX, gradY); +} + +//-------------------------------------------------------------------------------------- +// Pixel Shader +//-------------------------------------------------------------------------------------- +DEFERRED_PS_OUTPUT PSMain( PS_INPUT Input ) : SV_TARGET +{ + DEFERRED_PS_OUTPUT output; + output.vReactiveMask = 0.0f; + + // --- Diffuse --- + float4 color = SampleAtlas(TX_AtlasDiffuse, SS_Linear, Input.vTexcoord3D, Input.vAtlasRect); + + // Alpha test + if (Input.vFlags & 4u) + { + ClipDistanceEffect(length(Input.vViewPosition), DIST_DrawDistance, color.r * 2 - 1, 500.0f); + DoAlphaTest(color.a); + output.vReactiveMask = 0.1f; + } + + // --- Normal mapping --- + float3 nrm; + if (Input.vFlags & 1u) + { + // Reconstruct the FX-atlas rect for the normal map from interpolated data. + // The normal atlas uses the same UV space as diffuse. + float4 nrmAtlasRect = Input.vNormalAtlasRect; + float2 rawUV = Input.vNormalAtlas3D.xy; + float slice = Input.vNormalAtlas3D.z; + float2 scale = nrmAtlasRect.zw - nrmAtlasRect.xy; + float2 gradX = ddx(rawUV) * scale; + float2 gradY = ddy(rawUV) * scale; + float2 atlasUV = nrmAtlasRect.xy + frac(rawUV) * scale; + + nrm = perturb_normal_from_grad( + Input.vNormalVS, + Input.vViewPosition, + TX_AtlasNormal, + float3(atlasUV, slice), + gradX, gradY, + SS_Linear, + MI_NormalmapStrength); + } + else + { + nrm = normalize(Input.vNormalVS); + } + + // --- FX map --- + float4 fx = 1.0f; + if (Input.vFlags & 2u) + { + fx = SampleAtlas(TX_AtlasFx, SS_Linear, Input.vFxAtlas3D, Input.vFxAtlasRect); + } + + output.vDiffuse = float4(color.rgb, Input.vDiffuse.y); + + output.vNrm.xyz = nrm; + output.vNrm.w = 1.0f; + + output.vSI_SP.x = MI_SpecularIntensity * fx.r; + output.vSI_SP.y = MI_SpecularPower * fx.g; + + output.vVelocity = CalculateVelocity(Input.vCurrClipPos, Input.vPrevClipPos); + + return output; +} diff --git a/D3D11Engine/Shaders/Toolbox.h b/D3D11Engine/Shaders/Toolbox.h index 5a22a510..40dd8f00 100644 --- a/D3D11Engine/Shaders/Toolbox.h +++ b/D3D11Engine/Shaders/Toolbox.h @@ -40,4 +40,16 @@ float3 perturb_normal( float3 N, float3 V, Texture2D normalmap, float2 texcoord, float3x3 TBN = cotangent_frame( N, -V, texcoord ); return normalize( mul(transpose(TBN), nrmmap) ); +} + +// Atlas variant: samples from a Texture2DArray using SampleGrad for correct mip selection +float3 perturb_normal_from_grad( float3 N, float3 V, Texture2DArray normalmap, float3 uvSlice, float2 gradX, float2 gradY, SamplerState samplerState, float normalmapDepth = 1.0f) +{ + float3 nrmmap = normalmap.SampleGrad(samplerState, uvSlice, gradX, gradY).xyz * 2 - 1; + nrmmap.xy *= -1.0f; + nrmmap.xy *= normalmapDepth; + nrmmap = normalize(nrmmap); + + float3x3 TBN = cotangent_frame( N, -V, uvSlice.xy ); + return normalize( mul(transpose(TBN), nrmmap) ); } \ No newline at end of file diff --git a/D3D11Engine/Shaders/VS_ExWorldAtlas.hlsl b/D3D11Engine/Shaders/VS_ExWorldAtlas.hlsl new file mode 100644 index 00000000..3fd6df7f --- /dev/null +++ b/D3D11Engine/Shaders/VS_ExWorldAtlas.hlsl @@ -0,0 +1,98 @@ +//-------------------------------------------------------------------------------------- +// World mesh vertex shader for atlas indirect draw path +// Reads per-submesh atlas descriptors from a StructuredBuffer. +// The submesh index comes from the instance ID buffer + StartInstanceLocation. +//-------------------------------------------------------------------------------------- + +#include "Globals_VS_ExConstants.h" + +cbuffer Matrices_PerFrame : register( b0 ) +{ + VS_ExConstantBuffer_PerFrame frame; +}; + +struct WorldMeshSubmeshGPUData +{ + int diffuseSlice; + float dUStart, dVStart, dUEnd, dVEnd; + int normalSlice; + float nUStart, nVStart, nUEnd, nVEnd; + int fxSlice; + float fUStart, fVStart, fUEnd, fVEnd; + uint flags; +}; + +StructuredBuffer submeshData : register( t1 ); + +//-------------------------------------------------------------------------------------- +// Input / Output structures +//-------------------------------------------------------------------------------------- +struct VS_INPUT +{ + float3 vPosition : POSITION; + float3 vNormal : NORMAL; + float2 vTex1 : TEXCOORD0; + float2 vTex2 : TEXCOORD1; + float4 vDiffuse : DIFFUSE; + + // StartInstanceLocation in the MDI args offsets this so it equals the submesh index + uint submeshIdx : INSTANCE_REMAP_INDEX; +}; + +struct VS_OUTPUT +{ + float3 vTexcoord3D : TEXCOORD0; // (rawU, rawV, diffuseSlice) + float2 vTexcoord2 : TEXCOORD1; + float4 vDiffuse : TEXCOORD2; + float4 vAtlasRect : TEXCOORD3; // diffuse (uStart, vStart, uEnd, vEnd) + float3 vNormalVS : TEXCOORD4; + float3 vViewPosition : TEXCOORD5; + float4 vCurrClipPos : TEXCOORD6; + float4 vPrevClipPos : TEXCOORD7; + float3 vNormalAtlas3D : TEXCOORD8; // (rawU, rawV, normalSlice) + float4 vNormalAtlasRect : TEXCOORD9; // normal (uStart, vStart, uEnd, vEnd) + float3 vFxAtlas3D : TEXCOORD10; // (rawU, rawV, fxSlice) + nointerpolation uint vFlags : TEXCOORD11; // material flags + float4 vFxAtlasRect : TEXCOORD12; // fx (uStart, vStart, uEnd, vEnd) + float4 vPosition : SV_POSITION; +}; + +//-------------------------------------------------------------------------------------- +// Vertex Shader +//-------------------------------------------------------------------------------------- +VS_OUTPUT VSMain( VS_INPUT Input ) +{ + VS_OUTPUT Output; + + WorldMeshSubmeshGPUData sm = submeshData[Input.submeshIdx]; + + // World mesh vertices are already in world space (M_World = Identity) + float3 positionWorld = Input.vPosition; + + Output.vPosition = mul( float4(positionWorld, 1), frame.M_ViewProj ); + + // Pass raw UVs + slice — PS does frac() and atlas remap per-pixel + Output.vTexcoord3D = float3( Input.vTex1, (float)sm.diffuseSlice ); + Output.vAtlasRect = float4( sm.dUStart, sm.dVStart, sm.dUEnd, sm.dVEnd ); + + Output.vTexcoord2 = Input.vTex2; + Output.vDiffuse = Input.vDiffuse; + Output.vNormalVS = mul( Input.vNormal, (float3x3)frame.M_View ); + Output.vViewPosition = mul( float4(positionWorld, 1), frame.M_View ).xyz; + + // Normal map atlas coords + Output.vNormalAtlas3D = float3( Input.vTex1, (float)sm.normalSlice ); + Output.vNormalAtlasRect = float4( sm.nUStart, sm.nVStart, sm.nUEnd, sm.nVEnd ); + + // FX map atlas coords + Output.vFxAtlas3D = float3( Input.vTex1, (float)sm.fxSlice ); + Output.vFxAtlasRect = float4( sm.fUStart, sm.fVStart, sm.fUEnd, sm.fVEnd ); + + Output.vFlags = sm.flags; + + // Motion vectors — static world mesh, so prev == current + Output.vCurrClipPos = mul( float4(positionWorld, 1.0), frame.M_UnjitteredViewProj ); + Output.vPrevClipPos = mul( float4(positionWorld, 1.0), frame.M_PrevViewProj ); + + return Output; +} From 20e30a58bb7e66d33e10c34d6e2cf36e35f6eb69 Mon Sep 17 00:00:00 2001 From: kirides <13602143+kirides@users.noreply.github.com> Date: Mon, 9 Mar 2026 14:35:55 +0100 Subject: [PATCH 07/42] restore feature level dx10 "support" - shadows broken currently --- D3D11Engine/D3D11StructuredBuffer.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/D3D11Engine/D3D11StructuredBuffer.h b/D3D11Engine/D3D11StructuredBuffer.h index 1d2a61c5..a47dcfd5 100644 --- a/D3D11Engine/D3D11StructuredBuffer.h +++ b/D3D11Engine/D3D11StructuredBuffer.h @@ -28,7 +28,8 @@ class D3D11StructuredBuffer { } else if ( gpuWrite ) { desc.Usage = D3D11_USAGE_DEFAULT; desc.CPUAccessFlags = 0; - desc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS; + desc.BindFlags = D3D11_BIND_SHADER_RESOURCE + | (device->GetFeatureLevel() >= D3D_FEATURE_LEVEL_11_0 ? D3D11_BIND_UNORDERED_ACCESS : 0); } else { desc.Usage = D3D11_USAGE_DEFAULT; desc.CPUAccessFlags = 0; From f6f533b23718c3ac104aaf45f9958c664042ca2b Mon Sep 17 00:00:00 2001 From: kirides <13602143+kirides@users.noreply.github.com> Date: Mon, 9 Mar 2026 19:45:34 +0100 Subject: [PATCH 08/42] pass output RTV and DSV into lighting pass --- D3D11Engine/D3D11GraphicsEngine.cpp | 7 +++++-- D3D11Engine/D3D11ShadowMap.cpp | 21 ++++++++++++--------- D3D11Engine/D3D11ShadowMap.h | 10 +++++++--- 3 files changed, 24 insertions(+), 14 deletions(-) diff --git a/D3D11Engine/D3D11GraphicsEngine.cpp b/D3D11Engine/D3D11GraphicsEngine.cpp index a0b26841..ad653973 100644 --- a/D3D11Engine/D3D11GraphicsEngine.cpp +++ b/D3D11Engine/D3D11GraphicsEngine.cpp @@ -3131,10 +3131,11 @@ XRESULT D3D11GraphicsEngine::OnStartWorldRendering() { builder.Read( specularResource ); builder.Write( backBufferHandle ); - pass.m_executeCallback = [this, colorResource, normalsResource, specularResource](const RenderGraph& graph)-> void { + pass.m_executeCallback = [this, colorResource, normalsResource, specularResource, backBufferHandle](const RenderGraph& graph)-> void { auto colorTexture = graph.GetPhysicalTexture(colorResource); auto normalsTexture = graph.GetPhysicalTexture(normalsResource); auto specularTexture = graph.GetPhysicalTexture(specularResource); + auto backbuffer = graph.GetPhysicalTexture( backBufferHandle ); if ( Engine::GAPI->GetRendererState().RendererSettings.EnableShadows ) { // Cascades only get rendered if this is enabled. @@ -3147,7 +3148,9 @@ XRESULT D3D11GraphicsEngine::OnStartWorldRendering() { *colorTexture, *normalsTexture, *specularTexture, - *GetDepthBufferCopy()); + *GetDepthBufferCopy(), + backbuffer->GetRenderTargetView().Get(), + GetDepthBuffer()->GetDepthStencilView().Get() ); if ( !Engine::GAPI->GetRendererState().RendererSettings.FixViewFrustum ) { m_FrameLights.clear(); } diff --git a/D3D11Engine/D3D11ShadowMap.cpp b/D3D11Engine/D3D11ShadowMap.cpp index 0e1284a3..1fd566d8 100644 --- a/D3D11Engine/D3D11ShadowMap.cpp +++ b/D3D11Engine/D3D11ShadowMap.cpp @@ -916,7 +916,7 @@ XRESULT D3D11ShadowMap::DrawRainShadowmap() { auto graphicsEngine = reinterpret_cast(Engine::GraphicsEngine); auto _ = graphicsEngine->RecordGraphicsEvent( L"DrawRainShadowmap" ); - graphicsEngine->Effects->DrawRainShadowmap(); + return graphicsEngine->Effects->DrawRainShadowmap(); } return XR_SUCCESS; } @@ -925,8 +925,10 @@ XRESULT D3D11ShadowMap::DrawPointlightLights( std::vector& lights, RenderToTextureBuffer& color, RenderToTextureBuffer& normals, - RenderToTextureBuffer& specular, - RenderToTextureBuffer& depthCopy + RenderToTextureBuffer& specular, + RenderToTextureBuffer& depthCopy, + ID3D11RenderTargetView* outputRTV, + ID3D11DepthStencilView* dsv ) { auto& settings = Engine::GAPI->GetRendererState().RendererSettings; @@ -942,7 +944,9 @@ XRESULT D3D11ShadowMap::DrawLighting( RenderToTextureBuffer& color, RenderToTextureBuffer& normals, RenderToTextureBuffer& specular, - RenderToTextureBuffer& depthCopy) { + RenderToTextureBuffer& depthCopy, + ID3D11RenderTargetView* outputRTV, + ID3D11DepthStencilView* dsv) { auto graphicsEngine = reinterpret_cast(Engine::GraphicsEngine); auto& settings = Engine::GAPI->GetRendererState().RendererSettings; @@ -959,7 +963,7 @@ XRESULT D3D11ShadowMap::DrawLighting( Engine::GAPI->SetFarPlane(static_cast(settings.SectionDrawRadius) * WORLD_SECTION_SIZE ); - DrawPointlightLights(lights, color, normals, specular, depthCopy); + DrawPointlightLights(lights, color, normals, specular, depthCopy, outputRTV, dsv); m_context->OMSetRenderTargets( 1, graphicsEngine->GetHDRBackBuffer().GetRenderTargetView().GetAddressOf(), nullptr ); @@ -974,10 +978,9 @@ XRESULT D3D11ShadowMap::DrawLighting( srvs[0] = specular.GetShaderResView().Get(); m_context->PSSetShaderResources( 7, 1, srvs ); - DrawWorldLights(); + DrawWorldLights( outputRTV ); - m_context->OMSetRenderTargets( 1, graphicsEngine->GetHDRBackBuffer().GetRenderTargetView().GetAddressOf(), - graphicsEngine->GetDepthBuffer()->GetDepthStencilView().Get() ); + m_context->OMSetRenderTargets( 1, &outputRTV, dsv ); return XR_SUCCESS; } @@ -1090,7 +1093,7 @@ void D3D11ShadowMap::RenderShadowmaps( const RenderShadowmapsParams& params ) { WORLD_SECTION_SIZE ); } -XRESULT D3D11ShadowMap::DrawWorldLights() +XRESULT D3D11ShadowMap::DrawWorldLights(ID3D11RenderTargetView* outputRTV) { auto graphicsEngine = reinterpret_cast(Engine::GraphicsEngine); auto _ = graphicsEngine->RecordGraphicsEvent( L"DrawWorldLights" ); diff --git a/D3D11Engine/D3D11ShadowMap.h b/D3D11Engine/D3D11ShadowMap.h index bc3f8e48..41903ab1 100644 --- a/D3D11Engine/D3D11ShadowMap.h +++ b/D3D11Engine/D3D11ShadowMap.h @@ -137,14 +137,18 @@ class D3D11ShadowMap { XRESULT DrawWorldShadow(); XRESULT DrawRainShadowmap(); XRESULT DrawPointlightLights(std::vector& lights, RenderToTextureBuffer& color, RenderToTextureBuffer& normals, RenderToTextureBuffer - & specular, RenderToTextureBuffer& depthCopy); + & specular, RenderToTextureBuffer& depthCopy, + ID3D11RenderTargetView* outputRTV, + ID3D11DepthStencilView* dsv ); /** Renders the shadowmaps for the sun using parameter struct */ void RenderShadowmaps( const RenderShadowmapsParams& params ); - XRESULT DrawWorldLights(); + XRESULT DrawWorldLights( ID3D11RenderTargetView* outputRTV ); XRESULT DrawLighting(std::vector& lights, RenderToTextureBuffer& color, RenderToTextureBuffer& normals, RenderToTextureBuffer - & specular, RenderToTextureBuffer& depthCopy); + & specular, RenderToTextureBuffer& depthCopy, + ID3D11RenderTargetView* outputRTV, + ID3D11DepthStencilView* dsv ); void XM_CALLCONV RenderShadowCube( DirectX::FXMVECTOR position, float range, From e45af34bcadc7809f535e85b23895ac4655cd104 Mon Sep 17 00:00:00 2001 From: kirides <13602143+kirides@users.noreply.github.com> Date: Mon, 9 Mar 2026 19:45:39 +0100 Subject: [PATCH 09/42] implement PSO --- D3D11Engine/D3D11Engine.vcxproj | 2 + D3D11Engine/D3D11Engine.vcxproj.filters | 7 + D3D11Engine/D3D11PipelineStateObject.cpp | 288 +++++++++++++++++++++++ D3D11Engine/D3D11PipelineStateObject.h | 193 +++++++++++++++ 4 files changed, 490 insertions(+) create mode 100644 D3D11Engine/D3D11PipelineStateObject.cpp create mode 100644 D3D11Engine/D3D11PipelineStateObject.h diff --git a/D3D11Engine/D3D11Engine.vcxproj b/D3D11Engine/D3D11Engine.vcxproj index c8c0ef9a..bc041ad7 100644 --- a/D3D11Engine/D3D11Engine.vcxproj +++ b/D3D11Engine/D3D11Engine.vcxproj @@ -841,6 +841,7 @@ copy "$(OutDir)$(TargetName).pdb" "$(G1_SYSTEM_PATH)\ddraw.pdb" + @@ -1103,6 +1104,7 @@ copy "$(OutDir)$(TargetName).pdb" "$(G1_SYSTEM_PATH)\ddraw.pdb" + diff --git a/D3D11Engine/D3D11Engine.vcxproj.filters b/D3D11Engine/D3D11Engine.vcxproj.filters index 4625169d..acbd0adc 100644 --- a/D3D11Engine/D3D11Engine.vcxproj.filters +++ b/D3D11Engine/D3D11Engine.vcxproj.filters @@ -383,6 +383,10 @@ ZenGin\Classes + + + Engine\D3D11 + Engine\D3D11\PFX\Effects @@ -881,6 +885,9 @@ Engine\D3D11 + + Engine\D3D11 + Engine\D3D11 diff --git a/D3D11Engine/D3D11PipelineStateObject.cpp b/D3D11Engine/D3D11PipelineStateObject.cpp new file mode 100644 index 00000000..77cc45ef --- /dev/null +++ b/D3D11Engine/D3D11PipelineStateObject.cpp @@ -0,0 +1,288 @@ +#include "pch.h" +#include "D3D11PipelineStateObject.h" +#include "GothicGraphicsState.h" +#include "D3D11VShader.h" +#include "D3D11PShader.h" +#include "D3D11GShader.h" +#include "D3D11HDShader.h" +#include "Toolbox.h" + +// --------------------------------------------------------------------------- +// D3D11PipelineStateObject::Desc +// --------------------------------------------------------------------------- + +D3D11PipelineStateObject::Desc::Desc() { + BlendState.SetDefault(); + RasterizerState.SetDefault(); + DepthStencilState.SetDefault(); + std::fill( std::begin( RTVFormats ), std::end( RTVFormats ), DXGI_FORMAT_UNKNOWN ); +} + +// --------------------------------------------------------------------------- +// D3D11PipelineStateObject +// --------------------------------------------------------------------------- + +D3D11PipelineStateObject::D3D11PipelineStateObject( const Desc& desc ) + : m_VS( desc.VS ) + , m_PS( desc.PS ) + , m_GS( desc.GS ) + , m_HDS( desc.HDS ) + , m_BlendState( desc.BlendState ) + , m_SampleMask( desc.SampleMask ) + , m_RasterizerState( desc.RasterizerState ) + , m_DepthStencilState( desc.DepthStencilState ) + , m_TopologyType( desc.TopologyType ) + , m_NumRenderTargets( desc.NumRenderTargets ) + , m_DSVFormat( desc.DSVFormat ) + , m_SampleDesc( desc.SampleDesc ) +{ + memcpy( m_RTVFormats, desc.RTVFormats, sizeof( m_RTVFormats ) ); + + // Ensure the Gothic state hashes are up to date + m_BlendState.SetDirty(); + m_RasterizerState.SetDirty(); + m_DepthStencilState.SetDirty(); + + ComputeHash(); +} + +static void HashPointer( std::size_t& seed, const void* ptr ) { + auto v = reinterpret_cast( ptr ); + Toolbox::hash_combine( seed, static_cast( v ) ); + if constexpr ( sizeof( uintptr_t ) > sizeof( DWORD ) ) { + Toolbox::hash_combine( seed, static_cast( v >> 32 ) ); + } +} + +void D3D11PipelineStateObject::ComputeHash() { + m_Hash = 0; + + // Shader identity: use raw pointer value as a unique id + HashPointer( m_Hash, m_VS.get() ); + HashPointer( m_Hash, m_PS.get() ); + HashPointer( m_Hash, m_GS.get() ); + HashPointer( m_Hash, m_HDS.get() ); + + // Fixed-function state hashes (already computed by SetDirty) + Toolbox::hash_combine( m_Hash, static_cast( m_BlendState.Hash ) ); + Toolbox::hash_combine( m_Hash, static_cast( m_RasterizerState.Hash ) ); + Toolbox::hash_combine( m_Hash, static_cast( m_DepthStencilState.Hash ) ); + + // Sample mask + Toolbox::hash_combine( m_Hash, static_cast(m_SampleMask) ); + + // Topology + Toolbox::hash_combine( m_Hash, static_cast( m_TopologyType ) ); + + // Render target formats + Toolbox::hash_combine( m_Hash, static_cast( m_NumRenderTargets ) ); + for ( UINT i = 0; i < 8; ++i ) { + Toolbox::hash_combine( m_Hash, static_cast( m_RTVFormats[i] ) ); + } + Toolbox::hash_combine( m_Hash, static_cast( m_DSVFormat ) ); + + // Sample desc + Toolbox::hash_combine( m_Hash, static_cast( m_SampleDesc.Count ) ); + Toolbox::hash_combine( m_Hash, static_cast( m_SampleDesc.Quality ) ); +} + +// --------------------------------------------------------------------------- +// D3D11PipelineStateCache +// --------------------------------------------------------------------------- + +void D3D11PipelineStateCache::Init( ID3D11Device1* device, ID3D11DeviceContext1* context ) { + m_Device = device; + m_Context = context; +} + +void D3D11PipelineStateCache::SetPipelineState( const D3D11PipelineStateObject& pso ) { + // Fast-out: if the same PSO is already fully bound, nothing to do + if ( pso.GetHash() == m_BoundState.PSOHash ) + return; + + // --- Vertex Shader ------------------------------------------------------- + const size_t vsHash = reinterpret_cast( pso.GetVS().get() ); + if ( vsHash != m_BoundState.VSHash ) { + if ( pso.GetVS() ) { + pso.GetVS()->Apply(); + } else { + m_Context->VSSetShader( nullptr, nullptr, 0 ); + } + m_BoundState.VSHash = vsHash; + } + + // --- Pixel Shader -------------------------------------------------------- + const size_t psHash = reinterpret_cast( pso.GetPS().get() ); + if ( psHash != m_BoundState.PSHash ) { + if ( pso.GetPS() ) { + pso.GetPS()->Apply(); + } else { + m_Context->PSSetShader( nullptr, nullptr, 0 ); + } + m_BoundState.PSHash = psHash; + } + + // --- Geometry Shader ----------------------------------------------------- + const size_t gsHash = reinterpret_cast( pso.GetGS().get() ); + if ( gsHash != m_BoundState.GSHash ) { + if ( pso.GetGS() ) { + pso.GetGS()->Apply(); + } else { + m_Context->GSSetShader( nullptr, nullptr, 0 ); + } + m_BoundState.GSHash = gsHash; + } + + // --- Hull / Domain Shader ------------------------------------------------ + const size_t hdsHash = reinterpret_cast( pso.GetHDS().get() ); + if ( hdsHash != m_BoundState.HDSHash ) { + if ( pso.GetHDS() ) { + pso.GetHDS()->Apply(); + } else { + m_Context->HSSetShader( nullptr, nullptr, 0 ); + m_Context->DSSetShader( nullptr, nullptr, 0 ); + } + m_BoundState.HDSHash = hdsHash; + } + + // --- Blend State --------------------------------------------------------- + const size_t blendHash = pso.GetBlendState().Hash; + if ( blendHash != m_BoundState.BlendHash ) { + auto blendState = GetOrCreateBlendState( pso.GetBlendState() ); + const float blendFactor[4] = { 0, 0, 0, 0 }; + m_Context->OMSetBlendState( blendState.Get(), blendFactor, pso.GetSampleMask() ); + m_BoundState.BlendHash = blendHash; + m_BoundState.SampleMask = pso.GetSampleMask(); + } else if ( pso.GetSampleMask() != m_BoundState.SampleMask ) { + // Same blend state but different sample mask — need to rebind + auto it = m_BlendStates.find( blendHash ); + if ( it != m_BlendStates.end() ) { + const float blendFactor[4] = { 0, 0, 0, 0 }; + m_Context->OMSetBlendState( it->second.Get(), blendFactor, pso.GetSampleMask() ); + } + m_BoundState.SampleMask = pso.GetSampleMask(); + } + + // --- Rasterizer State ---------------------------------------------------- + const size_t rastHash = pso.GetRasterizerState().Hash; + if ( rastHash != m_BoundState.RasterizerHash ) { + auto rastState = GetOrCreateRasterizerState( pso.GetRasterizerState() ); + m_Context->RSSetState( rastState.Get() ); + m_BoundState.RasterizerHash = rastHash; + } + + // --- Depth-Stencil State ------------------------------------------------- + const size_t dsHash = pso.GetDepthStencilState().Hash; + if ( dsHash != m_BoundState.DepthStencilHash ) { + auto dsState = GetOrCreateDepthStencilState( pso.GetDepthStencilState() ); + m_Context->OMSetDepthStencilState( dsState.Get(), 0 ); + m_BoundState.DepthStencilHash = dsHash; + } + + // --- Primitive Topology -------------------------------------------------- + const D3D11_PRIMITIVE_TOPOLOGY topology = pso.GetD3D11Topology(); + if ( topology != m_BoundState.Topology ) { + m_Context->IASetPrimitiveTopology( topology ); + m_BoundState.Topology = topology; + } + + // Mark whole PSO as bound + m_BoundState.PSOHash = pso.GetHash(); +} + +void D3D11PipelineStateCache::Invalidate() { + m_BoundState = BoundState{}; +} + +void D3D11PipelineStateCache::Clear() { + Invalidate(); + m_BlendStates.clear(); + m_RasterizerStates.clear(); + m_DepthStencilStates.clear(); +} + +// --------------------------------------------------------------------------- +// State object creation helpers +// --------------------------------------------------------------------------- + +Microsoft::WRL::ComPtr +D3D11PipelineStateCache::GetOrCreateBlendState( const GothicBlendStateInfo& desc ) { + auto it = m_BlendStates.find( desc.Hash ); + if ( it != m_BlendStates.end() ) + return it->second; + + D3D11_BLEND_DESC bd = {}; + bd.AlphaToCoverageEnable = desc.AlphaToCoverage; + bd.IndependentBlendEnable = FALSE; + + bd.RenderTarget[0].BlendEnable = desc.BlendEnabled; + bd.RenderTarget[0].SrcBlend = static_cast( desc.SrcBlend ); + bd.RenderTarget[0].DestBlend = static_cast( desc.DestBlend ); + bd.RenderTarget[0].BlendOp = static_cast( desc.BlendOp ); + bd.RenderTarget[0].SrcBlendAlpha = static_cast( desc.SrcBlendAlpha ); + bd.RenderTarget[0].DestBlendAlpha = static_cast( desc.DestBlendAlpha ); + bd.RenderTarget[0].BlendOpAlpha = static_cast( desc.BlendOpAlpha ); + bd.RenderTarget[0].RenderTargetWriteMask = desc.ColorWritesEnabled + ? ( D3D11_COLOR_WRITE_ENABLE_RED | D3D11_COLOR_WRITE_ENABLE_GREEN | + D3D11_COLOR_WRITE_ENABLE_BLUE | D3D11_COLOR_WRITE_ENABLE_ALPHA ) + : 0; + + Microsoft::WRL::ComPtr state; + m_Device->CreateBlendState( &bd, state.GetAddressOf() ); + m_BlendStates[desc.Hash] = state; + return state; +} + +Microsoft::WRL::ComPtr +D3D11PipelineStateCache::GetOrCreateRasterizerState( const GothicRasterizerStateInfo& desc ) { + auto it = m_RasterizerStates.find( desc.Hash ); + if ( it != m_RasterizerStates.end() ) + return it->second; + + D3D11_RASTERIZER_DESC rd = {}; + rd.CullMode = static_cast( desc.CullMode ); + rd.FillMode = desc.Wireframe ? D3D11_FILL_WIREFRAME : D3D11_FILL_SOLID; + rd.FrontCounterClockwise = desc.FrontCounterClockwise; + rd.DepthBias = desc.ZBias; + rd.DepthBiasClamp = 0; + rd.SlopeScaledDepthBias = 0; + rd.DepthClipEnable = desc.DepthClipEnable; + rd.ScissorEnable = false; + rd.MultisampleEnable = false; + rd.AntialiasedLineEnable = true; + + Microsoft::WRL::ComPtr state; + m_Device->CreateRasterizerState( &rd, state.GetAddressOf() ); + m_RasterizerStates[desc.Hash] = state; + return state; +} + +Microsoft::WRL::ComPtr +D3D11PipelineStateCache::GetOrCreateDepthStencilState( const GothicDepthBufferStateInfo& desc ) { + auto it = m_DepthStencilStates.find( desc.Hash ); + if ( it != m_DepthStencilStates.end() ) + return it->second; + + D3D11_DEPTH_STENCIL_DESC dd = {}; + dd.DepthEnable = desc.DepthBufferEnabled; + dd.DepthWriteMask = desc.DepthWriteEnabled ? D3D11_DEPTH_WRITE_MASK_ALL + : D3D11_DEPTH_WRITE_MASK_ZERO; + dd.DepthFunc = static_cast( desc.DepthBufferCompareFunc ); + + dd.StencilEnable = false; + dd.StencilReadMask = 0xFF; + dd.StencilWriteMask = 0xFF; + dd.FrontFace.StencilFailOp = D3D11_STENCIL_OP_KEEP; + dd.FrontFace.StencilDepthFailOp = D3D11_STENCIL_OP_INCR; + dd.FrontFace.StencilPassOp = D3D11_STENCIL_OP_KEEP; + dd.FrontFace.StencilFunc = D3D11_COMPARISON_ALWAYS; + dd.BackFace.StencilFailOp = D3D11_STENCIL_OP_KEEP; + dd.BackFace.StencilDepthFailOp = D3D11_STENCIL_OP_DECR; + dd.BackFace.StencilPassOp = D3D11_STENCIL_OP_KEEP; + dd.BackFace.StencilFunc = D3D11_COMPARISON_ALWAYS; + + Microsoft::WRL::ComPtr state; + m_Device->CreateDepthStencilState( &dd, state.GetAddressOf() ); + m_DepthStencilStates[desc.Hash] = state; + return state; +} diff --git a/D3D11Engine/D3D11PipelineStateObject.h b/D3D11Engine/D3D11PipelineStateObject.h new file mode 100644 index 00000000..51408ac0 --- /dev/null +++ b/D3D11Engine/D3D11PipelineStateObject.h @@ -0,0 +1,193 @@ +#pragma once +#include "pch.h" +#include +#include +#include "GothicGraphicsState.h" + +class D3D11VShader; +class D3D11PShader; +class D3D11GShader; +class D3D11HDShader; + +struct GothicBlendStateInfo; +struct GothicRasterizerStateInfo; +struct GothicDepthBufferStateInfo; + +// Mirrors D3D12_PRIMITIVE_TOPOLOGY_TYPE +enum class PrimitiveTopologyType : uint8_t { + Undefined = 0, + Point = 1, + Line = 2, + Triangle = 3, + Patch = 4 +}; + +/** Converts PrimitiveTopologyType to the most common D3D11 topology for that type */ +inline D3D11_PRIMITIVE_TOPOLOGY ToD3D11Topology( PrimitiveTopologyType type ) { + switch ( type ) { + case PrimitiveTopologyType::Point: return D3D11_PRIMITIVE_TOPOLOGY_POINTLIST; + case PrimitiveTopologyType::Line: return D3D11_PRIMITIVE_TOPOLOGY_LINELIST; + case PrimitiveTopologyType::Triangle: return D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST; + case PrimitiveTopologyType::Patch: return D3D11_PRIMITIVE_TOPOLOGY_3_CONTROL_POINT_PATCHLIST; + default: return D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED; + } +} + +/** + * Immutable pipeline state object, modeled after D3D12_GRAPHICS_PIPELINE_STATE_DESC. + * + * Captures the full set of static pipeline configuration that in DX12 would be + * baked into a single ID3D12PipelineState: + * - Shader stages (VS, PS, GS, Hull/Domain) + * - Blend, Rasterizer, DepthStencil states + * - Primitive topology type + * - Sample mask / sample desc + * - Render-target and depth-stencil formats + * + * Once constructed the object is immutable. A hash is computed at creation + * time so that the PipelineStateCache can quickly detect redundant state sets. + */ +class D3D11PipelineStateObject { +public: + /** Descriptor used to build a PSO – fill this in then pass to the constructor. */ + struct Desc { + // --- Shader stages (nullable) ---------------------------------------- + std::shared_ptr VS; + std::shared_ptr PS; + std::shared_ptr GS; + std::shared_ptr HDS; // Hull + Domain (combined, matching existing codebase) + + // --- Fixed-function state -------------------------------------------- + GothicBlendStateInfo BlendState; + UINT SampleMask = 0xFFFFFFFF; + GothicRasterizerStateInfo RasterizerState; + GothicDepthBufferStateInfo DepthStencilState; + + // --- Input assembly -------------------------------------------------- + PrimitiveTopologyType TopologyType = PrimitiveTopologyType::Triangle; + + // --- Render target description (for future DX12) --------------------- + UINT NumRenderTargets = 1; + DXGI_FORMAT RTVFormats[8] = {}; + DXGI_FORMAT DSVFormat = DXGI_FORMAT_D32_FLOAT; + DXGI_SAMPLE_DESC SampleDesc = { 1, 0 }; + + Desc(); + }; + + explicit D3D11PipelineStateObject( const Desc& desc ); + + // --- Accessors (const, PSO is immutable) --------------------------------- + + size_t GetHash() const { return m_Hash; } + bool operator==( const D3D11PipelineStateObject& o ) const { return m_Hash == o.m_Hash; } + bool operator!=( const D3D11PipelineStateObject& o ) const { return m_Hash != o.m_Hash; } + + const std::shared_ptr& GetVS() const { return m_VS; } + const std::shared_ptr& GetPS() const { return m_PS; } + const std::shared_ptr& GetGS() const { return m_GS; } + const std::shared_ptr& GetHDS() const { return m_HDS; } + + const GothicBlendStateInfo& GetBlendState() const { return m_BlendState; } + const GothicRasterizerStateInfo& GetRasterizerState() const { return m_RasterizerState; } + const GothicDepthBufferStateInfo& GetDepthStencilState() const { return m_DepthStencilState; } + + UINT GetSampleMask() const { return m_SampleMask; } + PrimitiveTopologyType GetTopologyType() const { return m_TopologyType; } + D3D11_PRIMITIVE_TOPOLOGY GetD3D11Topology() const { return ToD3D11Topology( m_TopologyType ); } + + UINT GetNumRenderTargets() const { return m_NumRenderTargets; } + DXGI_FORMAT GetRTVFormat( UINT i ) const { return (i < 8) ? m_RTVFormats[i] : DXGI_FORMAT_UNKNOWN; } + DXGI_FORMAT GetDSVFormat() const { return m_DSVFormat; } + const DXGI_SAMPLE_DESC& GetSampleDesc() const { return m_SampleDesc; } + +private: + void ComputeHash(); + + // Shaders + std::shared_ptr m_VS; + std::shared_ptr m_PS; + std::shared_ptr m_GS; + std::shared_ptr m_HDS; + + // Fixed-function state (stored by value – small POD structs) + GothicBlendStateInfo m_BlendState; + UINT m_SampleMask; + GothicRasterizerStateInfo m_RasterizerState; + GothicDepthBufferStateInfo m_DepthStencilState; + + // Input assembly + PrimitiveTopologyType m_TopologyType; + + // Render target description + UINT m_NumRenderTargets; + DXGI_FORMAT m_RTVFormats[8]; + DXGI_FORMAT m_DSVFormat; + DXGI_SAMPLE_DESC m_SampleDesc; + + // Combined hash of the entire PSO + size_t m_Hash = 0; +}; + +/** + * Pipeline-state cache that tracks which D3D11 states are currently bound and + * performs the minimal set of API calls when switching to a new PSO. + * + * Usage: + * cache.SetPipelineState(myPSO); // binds everything that changed + * + * Internally caches the D3D11 blend / rasterizer / depth-stencil state COM + * objects so they are created at most once per unique configuration. + */ +class D3D11PipelineStateCache { +public: + D3D11PipelineStateCache() = default; + + /** Initialise with the D3D11 device and immediate context. */ + void Init( ID3D11Device1* device, ID3D11DeviceContext1* context ); + + /** + * Apply a pipeline state object. Only the state that differs from the + * currently bound state will be set on the device context. + */ + void SetPipelineState( const D3D11PipelineStateObject& pso ); + + /** + * Mark all tracked state as unknown, forcing the next SetPipelineState + * to re-bind everything. Call this when external code (e.g. the Gothic + * engine) may have changed D3D11 state behind the cache's back. + */ + void Invalidate(); + + /** Release all cached D3D11 state objects. */ + void Clear(); + +private: + // --- Cached D3D11 state objects (keyed by Gothic state hash) ------------- + Microsoft::WRL::ComPtr GetOrCreateBlendState( const GothicBlendStateInfo& desc ); + Microsoft::WRL::ComPtr GetOrCreateRasterizerState( const GothicRasterizerStateInfo& desc ); + Microsoft::WRL::ComPtr GetOrCreateDepthStencilState( const GothicDepthBufferStateInfo& desc ); + + ID3D11Device1* m_Device = nullptr; + ID3D11DeviceContext1* m_Context = nullptr; + + // --- Currently bound state (tracked to skip redundant API calls) --------- + struct BoundState { + size_t PSOHash = 0; + size_t VSHash = 0; + size_t PSHash = 0; + size_t GSHash = 0; + size_t HDSHash = 0; + size_t BlendHash = 0; + size_t RasterizerHash = 0; + size_t DepthStencilHash = 0; + UINT SampleMask = 0xFFFFFFFF; + D3D11_PRIMITIVE_TOPOLOGY Topology = D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED; + }; + BoundState m_BoundState{}; + + // --- State object caches (one D3D11 object per unique hash) -------------- + std::unordered_map> m_BlendStates; + std::unordered_map> m_RasterizerStates; + std::unordered_map> m_DepthStencilStates; +}; From f7b9f3e1a3e901cceb41e7345cbd7e0c30830de9 Mon Sep 17 00:00:00 2001 From: kirides <13602143+kirides@users.noreply.github.com> Date: Mon, 9 Mar 2026 20:30:50 +0100 Subject: [PATCH 10/42] reduce interface level --- D3D11Engine/D3D11CascadedShadowMapBuffer.cpp | 4 ++-- D3D11Engine/D3D11CascadedShadowMapBuffer.h | 4 ++-- D3D11Engine/D3D11ShadowAtlas.cpp | 4 ++-- D3D11Engine/D3D11ShadowAtlas.h | 4 ++-- D3D11Engine/D3D11ShadowMap.cpp | 6 +++--- D3D11Engine/D3D11ShadowMap.h | 6 +++--- 6 files changed, 14 insertions(+), 14 deletions(-) diff --git a/D3D11Engine/D3D11CascadedShadowMapBuffer.cpp b/D3D11Engine/D3D11CascadedShadowMapBuffer.cpp index 0139b414..ee7e9ccf 100644 --- a/D3D11Engine/D3D11CascadedShadowMapBuffer.cpp +++ b/D3D11Engine/D3D11CascadedShadowMapBuffer.cpp @@ -115,13 +115,13 @@ ID3D11ShaderResourceView* D3D11CascadedShadowMapBuffer::GetShaderResourceView() return m_srv.Get(); } -void D3D11CascadedShadowMapBuffer::BindToPixelShader( ID3D11DeviceContext1* context, UINT slot ) const { +void D3D11CascadedShadowMapBuffer::BindToPixelShader( ID3D11DeviceContext* context, UINT slot ) const { if ( m_srv ) { context->PSSetShaderResources( slot, 1, m_srv.GetAddressOf() ); } } -void D3D11CascadedShadowMapBuffer::BindToVertexShader( ID3D11DeviceContext1* context, UINT slot ) const { +void D3D11CascadedShadowMapBuffer::BindToVertexShader( ID3D11DeviceContext* context, UINT slot ) const { if ( m_srv ) { context->VSSetShaderResources( slot, 1, m_srv.GetAddressOf() ); } diff --git a/D3D11Engine/D3D11CascadedShadowMapBuffer.h b/D3D11Engine/D3D11CascadedShadowMapBuffer.h index 694c205b..260c53e6 100644 --- a/D3D11Engine/D3D11CascadedShadowMapBuffer.h +++ b/D3D11Engine/D3D11CascadedShadowMapBuffer.h @@ -50,14 +50,14 @@ class D3D11CascadedShadowMapBuffer { * @param context Device context * @param slot Shader resource slot */ - void BindToPixelShader( ID3D11DeviceContext1* context, UINT slot ) const; + void BindToPixelShader( ID3D11DeviceContext* context, UINT slot ) const; /** * Bind the texture array to a vertex shader slot. * @param context Device context * @param slot Shader resource slot */ - void BindToVertexShader( ID3D11DeviceContext1* context, UINT slot ) const; + void BindToVertexShader( ID3D11DeviceContext* context, UINT slot ) const; /** Get the size of each cascade (width = height) */ UINT GetSize() const { return m_size; } diff --git a/D3D11Engine/D3D11ShadowAtlas.cpp b/D3D11Engine/D3D11ShadowAtlas.cpp index c4bd7984..a24370a9 100644 --- a/D3D11Engine/D3D11ShadowAtlas.cpp +++ b/D3D11Engine/D3D11ShadowAtlas.cpp @@ -184,13 +184,13 @@ ID3D11ShaderResourceView* D3D11ShadowAtlas::GetShaderResourceView() const { return m_srv.Get(); } -void D3D11ShadowAtlas::BindToPixelShader( ID3D11DeviceContext1* context, UINT slot ) const { +void D3D11ShadowAtlas::BindToPixelShader( ID3D11DeviceContext* context, UINT slot ) const { if ( m_srv ) { context->PSSetShaderResources( slot, 1, m_srv.GetAddressOf() ); } } -void D3D11ShadowAtlas::BindToVertexShader( ID3D11DeviceContext1* context, UINT slot ) const { +void D3D11ShadowAtlas::BindToVertexShader( ID3D11DeviceContext* context, UINT slot ) const { if ( m_srv ) { context->VSSetShaderResources( slot, 1, m_srv.GetAddressOf() ); } diff --git a/D3D11Engine/D3D11ShadowAtlas.h b/D3D11Engine/D3D11ShadowAtlas.h index e4ee63bc..ba385be7 100644 --- a/D3D11Engine/D3D11ShadowAtlas.h +++ b/D3D11Engine/D3D11ShadowAtlas.h @@ -68,8 +68,8 @@ class D3D11ShadowAtlas { /** Get the SRV for the atlas texture (Texture2D). */ ID3D11ShaderResourceView* GetShaderResourceView() const; - void BindToPixelShader( ID3D11DeviceContext1* context, UINT slot ) const; - void BindToVertexShader( ID3D11DeviceContext1* context, UINT slot ) const; + void BindToPixelShader( ID3D11DeviceContext* context, UINT slot ) const; + void BindToVertexShader( ID3D11DeviceContext* context, UINT slot ) const; /** Get cascade 0 pixel size (the largest cascade). */ UINT GetCascade0Size() const { return m_cascade0Size; } diff --git a/D3D11Engine/D3D11ShadowMap.cpp b/D3D11Engine/D3D11ShadowMap.cpp index 1fd566d8..d2bcea99 100644 --- a/D3D11Engine/D3D11ShadowMap.cpp +++ b/D3D11Engine/D3D11ShadowMap.cpp @@ -381,7 +381,7 @@ void D3D11ShadowMap::Resize( int size ) { m_lastNumCascades = static_cast( atlasNumCascades ); } -void D3D11ShadowMap::BindToPixelShader( ID3D11DeviceContext1* context, UINT slot ) { +void D3D11ShadowMap::BindToPixelShader( ID3D11DeviceContext* context, UINT slot ) { if ( m_useAtlas ) { if ( m_shadowAtlas ) m_shadowAtlas->BindToPixelShader( context, slot ); } else { @@ -389,11 +389,11 @@ void D3D11ShadowMap::BindToPixelShader( ID3D11DeviceContext1* context, UINT slot } } -void D3D11ShadowMap::BindSampler( ID3D11DeviceContext1* context, UINT slot ) { +void D3D11ShadowMap::BindSampler( ID3D11DeviceContext* context, UINT slot ) { if ( m_shadowmapSampler ) context->PSSetSamplers( slot, 1, m_shadowmapSampler.GetAddressOf() ); } -void D3D11ShadowMap::BindSamplerToCS( ID3D11DeviceContext1* context, UINT slot ) { +void D3D11ShadowMap::BindSamplerToCS( ID3D11DeviceContext* context, UINT slot ) { if ( m_shadowmapSampler ) context->CSSetSamplers( slot, 1, m_shadowmapSampler.GetAddressOf() ); } diff --git a/D3D11Engine/D3D11ShadowMap.h b/D3D11Engine/D3D11ShadowMap.h index 41903ab1..a8cef69e 100644 --- a/D3D11Engine/D3D11ShadowMap.h +++ b/D3D11Engine/D3D11ShadowMap.h @@ -119,11 +119,11 @@ class D3D11ShadowMap { } // Bind world shadowmap SRV to a pixel shader slot (binds entire cascade array) - void BindToPixelShader( ID3D11DeviceContext1* context, UINT slot ); + void BindToPixelShader( ID3D11DeviceContext* context, UINT slot ); // Bind the shadowmap sampler to the given slot - void BindSampler( ID3D11DeviceContext1* context, UINT slot ); - void BindSamplerToCS( ID3D11DeviceContext1* context, UINT slot ); + void BindSampler( ID3D11DeviceContext* context, UINT slot ); + void BindSamplerToCS( ID3D11DeviceContext* context, UINT slot ); XRESULT PrepareRender(); From 47cecd6088a711bb616ef70dfa28722f273edaf3 Mon Sep 17 00:00:00 2001 From: kirides <13602143+kirides@users.noreply.github.com> Date: Mon, 9 Mar 2026 20:33:17 +0100 Subject: [PATCH 11/42] Init PipelinestateCache --- D3D11Engine/D3D11GraphicsEngine.cpp | 3 +++ D3D11Engine/D3D11GraphicsEngine.h | 7 +++++++ 2 files changed, 10 insertions(+) diff --git a/D3D11Engine/D3D11GraphicsEngine.cpp b/D3D11Engine/D3D11GraphicsEngine.cpp index ad653973..8b47aafe 100644 --- a/D3D11Engine/D3D11GraphicsEngine.cpp +++ b/D3D11Engine/D3D11GraphicsEngine.cpp @@ -787,6 +787,9 @@ XRESULT D3D11GraphicsEngine::Init() { D3D11VertexBuffer::EBindFlags::B_INDEXBUFFER, D3D11VertexBuffer::EUsageFlags::U_IMMUTABLE ); + // Initialize pipeline state cache + m_PipelineStateCache.Init( Device.Get(), Context.Get() ); + // Create shadow map manager ShadowMaps = std::make_unique(); int initialShadowSize = Engine::GAPI->GetRendererState().RendererSettings.ShadowMapSize; diff --git a/D3D11Engine/D3D11GraphicsEngine.h b/D3D11Engine/D3D11GraphicsEngine.h index 0e216570..02510676 100644 --- a/D3D11Engine/D3D11GraphicsEngine.h +++ b/D3D11Engine/D3D11GraphicsEngine.h @@ -4,6 +4,7 @@ #include "GothicAPI.h" #include "D3D11ShadowMap.h" #include "D3D11ShaderManager.h" +#include "D3D11PipelineStateObject.h" #include "D3D11TextureAtlasManager.h" #include "D3D11StructuredBuffer.h" #include "D3D11IndirectBuffer.h" @@ -379,6 +380,9 @@ class D3D11GraphicsEngine : public D3D11GraphicsEngineBase { D3D11PfxRenderer* GetPfxRenderer() const { return PfxRenderer.get(); } D3D11Texture* GetDistortionTexture() const { return DistortionTexture.get(); } + /** Returns the pipeline state cache for optimal D3D11 state management */ + D3D11PipelineStateCache& GetPipelineStateCache() { return m_PipelineStateCache; } + RenderToTextureBuffer* GetVelocityBuffer() const { return VelocityBuffer.get(); } const XMFLOAT4X4& GetPrevViewProjMatrix() const { return m_PrevViewProjMatrix; } @@ -406,6 +410,9 @@ class D3D11GraphicsEngine : public D3D11GraphicsEngineBase { void CacheWorldStaticVobs(); + /** Pipeline state cache for minimizing redundant D3D11 state transitions */ + D3D11PipelineStateCache m_PipelineStateCache; + std::unique_ptr m_FrameLimiter; int m_LastFrameLimit; From 37131cd262e17d78a45089111d41965139621d14 Mon Sep 17 00:00:00 2001 From: kirides <13602143+kirides@users.noreply.github.com> Date: Mon, 9 Mar 2026 20:33:25 +0100 Subject: [PATCH 12/42] begin to abstract DeviceContext --- D3D11Engine/D3D11CommandList.cpp | 11 ++ D3D11Engine/D3D11CommandList.h | 140 ++++++++++++++++++++++++ D3D11Engine/D3D11Engine.vcxproj | 2 + D3D11Engine/D3D11Engine.vcxproj.filters | 6 + 4 files changed, 159 insertions(+) create mode 100644 D3D11Engine/D3D11CommandList.cpp create mode 100644 D3D11Engine/D3D11CommandList.h diff --git a/D3D11Engine/D3D11CommandList.cpp b/D3D11Engine/D3D11CommandList.cpp new file mode 100644 index 00000000..9fd631c0 --- /dev/null +++ b/D3D11Engine/D3D11CommandList.cpp @@ -0,0 +1,11 @@ +#include "pch.h" +#include "D3D11CommandList.h" +#include "D3D11VertexBuffer.h" + +void D3D11CommandList::IASetVertexBuffer( D3D11VertexBuffer* vb, UINT stride, UINT offset ) { + m_Context->IASetVertexBuffers( 0, 1, vb->GetVertexBuffer().GetAddressOf(), &stride, &offset ); +} + +void D3D11CommandList::IASetIndexBuffer( D3D11VertexBuffer* ib, DXGI_FORMAT format, UINT offset ) { + m_Context->IASetIndexBuffer( ib->GetVertexBuffer().Get(), format, offset ); +} diff --git a/D3D11Engine/D3D11CommandList.h b/D3D11Engine/D3D11CommandList.h new file mode 100644 index 00000000..41e4ee24 --- /dev/null +++ b/D3D11Engine/D3D11CommandList.h @@ -0,0 +1,140 @@ +#pragma once +#include "pch.h" +#include "D3D11PipelineStateObject.h" + +class D3D11VertexBuffer; + +/** + * Slim command-list wrapper around an ID3D11DeviceContext1 and the + * D3D11PipelineStateCache. + * + * Provides SetPipelineState() plus the commonly used Draw / IA / OM + * helpers so that call-sites read like a modern graphics API without + * touching the raw context or the engine's global render-state machine. + * + * The object is intentionally cheap to construct (two pointers) and + * does not own any resources. + */ +struct D3D11CommandList { + + D3D11CommandList() = default; + D3D11CommandList( ID3D11DeviceContext* context, D3D11PipelineStateCache* cache ) + : m_Context( context ), m_Cache( cache ) {} + + // --- Pipeline state ------------------------------------------------------ + + void SetPipelineState( const D3D11PipelineStateObject& pso ) { + m_Cache->SetPipelineState( pso ); + } + + /** Force the cache to re-bind everything on next SetPipelineState. */ + void InvalidatePipelineState() { + m_Cache->Invalidate(); + } + + // --- Input assembly ------------------------------------------------------ + + void IASetVertexBuffer( D3D11VertexBuffer* vb, UINT stride, UINT offset = 0 ); + + void IASetVertexBuffers( UINT startSlot, + UINT numBuffers, + ID3D11Buffer* const* buffers, + const UINT* strides, + const UINT* offsets ) { + m_Context->IASetVertexBuffers( startSlot, numBuffers, buffers, strides, offsets ); + } + + void IASetIndexBuffer( ID3D11Buffer* buffer, DXGI_FORMAT format, UINT offset = 0 ) { + m_Context->IASetIndexBuffer( buffer, format, offset ); + } + + void IASetIndexBuffer( D3D11VertexBuffer* ib, DXGI_FORMAT format, UINT offset = 0 ); + + // --- Draw calls ---------------------------------------------------------- + + void Draw( UINT vertexCount, UINT startVertexLocation = 0 ) { + m_Context->Draw( vertexCount, startVertexLocation ); + m_DrawnTriangles += vertexCount / 3; + } + + void DrawIndexed( UINT indexCount, + UINT startIndexLocation = 0, + INT baseVertexLocation = 0 ) { + m_Context->DrawIndexed( indexCount, startIndexLocation, baseVertexLocation ); + m_DrawnTriangles += indexCount / 3; + } + + void DrawInstanced( UINT vertexCountPerInstance, + UINT instanceCount, + UINT startVertexLocation = 0, + UINT startInstanceLocation = 0 ) { + m_Context->DrawInstanced( vertexCountPerInstance, instanceCount, + startVertexLocation, startInstanceLocation ); + m_DrawnTriangles += ( vertexCountPerInstance / 3 ) * instanceCount; + } + + void DrawIndexedInstanced( UINT indexCountPerInstance, + UINT instanceCount, + UINT startIndexLocation = 0, + INT baseVertexLocation = 0, + UINT startInstanceLocation = 0 ) { + m_Context->DrawIndexedInstanced( indexCountPerInstance, instanceCount, + startIndexLocation, baseVertexLocation, + startInstanceLocation ); + m_DrawnTriangles += ( indexCountPerInstance / 3 ) * instanceCount; + } + + void DrawIndexedInstancedIndirect( ID3D11Buffer* argsBuffer, + UINT alignedByteOffsetForArgs ) { + m_Context->DrawIndexedInstancedIndirect( argsBuffer, alignedByteOffsetForArgs ); + // Triangle count unknown for indirect draws + } + + // --- Render target / viewport helpers ------------------------------------ + + void OMSetRenderTargets( UINT numViews, + ID3D11RenderTargetView* const* rtvs, + ID3D11DepthStencilView* dsv ) { + m_Context->OMSetRenderTargets( numViews, rtvs, dsv ); + } + + void RSSetViewports( UINT numViewports, const D3D11_VIEWPORT* viewports ) { + m_Context->RSSetViewports( numViewports, viewports ); + } + + void RSGetViewports( UINT* numViewports, D3D11_VIEWPORT* viewports ) { + m_Context->RSGetViewports( numViewports, viewports ); + } + + void ClearDepthStencilView( ID3D11DepthStencilView* dsv, + UINT clearFlags, + float depth, + UINT8 stencil ) { + m_Context->ClearDepthStencilView( dsv, clearFlags, depth, stencil ); + } + + void ClearRenderTargetView( ID3D11RenderTargetView* rtv, const float color[4] ) { + m_Context->ClearRenderTargetView( rtv, color ); + } + + // --- Stats --------------------------------------------------------------- + + /** Return triangles drawn since last ResetStats() and reset counter. */ + UINT FlushDrawnTriangles() { + UINT t = m_DrawnTriangles; + m_DrawnTriangles = 0; + return t; + } + + UINT GetDrawnTriangles() const { return m_DrawnTriangles; } + + // --- Raw access (escape hatch) ------------------------------------------- + + ID3D11DeviceContext* GetContext() const { return m_Context; } + D3D11PipelineStateCache* GetPSOCache() const { return m_Cache; } + +private: + ID3D11DeviceContext* m_Context = nullptr; + D3D11PipelineStateCache* m_Cache = nullptr; + UINT m_DrawnTriangles = 0; +}; diff --git a/D3D11Engine/D3D11Engine.vcxproj b/D3D11Engine/D3D11Engine.vcxproj index bc041ad7..e64aa5bc 100644 --- a/D3D11Engine/D3D11Engine.vcxproj +++ b/D3D11Engine/D3D11Engine.vcxproj @@ -842,6 +842,7 @@ copy "$(OutDir)$(TargetName).pdb" "$(G1_SYSTEM_PATH)\ddraw.pdb" + @@ -1105,6 +1106,7 @@ copy "$(OutDir)$(TargetName).pdb" "$(G1_SYSTEM_PATH)\ddraw.pdb" + diff --git a/D3D11Engine/D3D11Engine.vcxproj.filters b/D3D11Engine/D3D11Engine.vcxproj.filters index acbd0adc..5e150944 100644 --- a/D3D11Engine/D3D11Engine.vcxproj.filters +++ b/D3D11Engine/D3D11Engine.vcxproj.filters @@ -387,6 +387,9 @@ Engine\D3D11 + + Engine\D3D11 + Engine\D3D11\PFX\Effects @@ -888,6 +891,9 @@ Engine\D3D11 + + Engine\D3D11 + Engine\D3D11 From cdfd4c5d8db86374024cdc86f2886ffbc2f3bc7c Mon Sep 17 00:00:00 2001 From: kirides <13602143+kirides@users.noreply.github.com> Date: Mon, 9 Mar 2026 21:07:43 +0100 Subject: [PATCH 13/42] remove "SetDIrty", instead hash the 3 states on "UpdateRendererStates" --- D3D11Engine/D3D11Effect.cpp | 6 -- D3D11Engine/D3D11GraphicsEngine.cpp | 87 ++++-------------------- D3D11Engine/D3D11GraphicsEngine.h | 5 ++ D3D11Engine/D3D11GraphicsEngineBase.cpp | 5 -- D3D11Engine/D3D11LineRenderer.cpp | 2 - D3D11Engine/D3D11PFX_DistanceBlur.cpp | 1 - D3D11Engine/D3D11PFX_GodRays.cpp | 1 - D3D11Engine/D3D11PFX_HDR.cpp | 1 - D3D11Engine/D3D11PFX_HeightFog.cpp | 2 - D3D11Engine/D3D11PFX_SMAA.cpp | 13 +++- D3D11Engine/D3D11PipelineStateObject.cpp | 11 +-- D3D11Engine/D3D11ShadowMap.cpp | 6 -- D3D11Engine/D3D7/MyDirect3DDevice7.h | 19 ++---- D3D11Engine/EditorLinePrimitive.cpp | 1 - D3D11Engine/GVegetationBox.cpp | 3 - D3D11Engine/GothicAPI.cpp | 7 -- D3D11Engine/GothicGraphicsState.h | 28 +++----- D3D11Engine/SMAA/D3D11SMAA.cpp | 28 ++------ D3D11Engine/SMAA/D3D11SMAA.h | 3 - 19 files changed, 56 insertions(+), 173 deletions(-) diff --git a/D3D11Engine/D3D11Effect.cpp b/D3D11Engine/D3D11Effect.cpp index 3bfecfe3..fd0d6c16 100644 --- a/D3D11Engine/D3D11Effect.cpp +++ b/D3D11Engine/D3D11Effect.cpp @@ -219,15 +219,12 @@ XRESULT D3D11Effect::DrawRain() { // Set alphablending state.BlendState.SetAlphaBlending(); - state.BlendState.SetDirty(); // Disable depth-write state.DepthState.DepthWriteEnabled = false; - state.DepthState.SetDirty(); // Disable culling state.RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_NONE; - state.RasterizerState.SetDirty(); // Rendering instances only e->GetContext()->IASetPrimitiveTopology( D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP ); @@ -376,15 +373,12 @@ XRESULT D3D11Effect::DrawRain_CS() { // Set alphablending state.BlendState.SetAlphaBlending(); - state.BlendState.SetDirty(); // Disable depth-write state.DepthState.DepthWriteEnabled = false; - state.DepthState.SetDirty(); // Disable culling state.RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_NONE; - state.RasterizerState.SetDirty(); // Rendering instances only e->GetContext()->IASetPrimitiveTopology( D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP ); diff --git a/D3D11Engine/D3D11GraphicsEngine.cpp b/D3D11Engine/D3D11GraphicsEngine.cpp index 8b47aafe..2685d423 100644 --- a/D3D11Engine/D3D11GraphicsEngine.cpp +++ b/D3D11Engine/D3D11GraphicsEngine.cpp @@ -1349,7 +1349,6 @@ XRESULT D3D11GraphicsEngine::OnBeginFrame() { // Disable culling for ui rendering(Sprite from LeGo needs it since it use CCW instead of CW order) SetDefaultStates(); rendererState.RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_NONE; - rendererState.RasterizerState.SetDirty(); UpdateRenderStates(); GetContext()->PSSetSamplers( 0, 1, ClampSamplerState.GetAddressOf() ); @@ -1906,10 +1905,8 @@ XRESULT D3D11GraphicsEngine::DrawScreenFade( void* c ) { // Default states SetDefaultStates(); Engine::GAPI->GetRendererState().BlendState.SetAlphaBlending(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); Engine::GAPI->GetRendererState().DepthState.DepthBufferCompareFunc = GothicDepthBufferStateInfo::CF_COMPARISON_ALWAYS; Engine::GAPI->GetRendererState().DepthState.DepthWriteEnabled = false; - Engine::GAPI->GetRendererState().DepthState.SetDirty(); SetActivePixelShader( PShaderID::PS_PFX_CinemaScope ); ActivePS->Apply(); @@ -1953,28 +1950,23 @@ XRESULT D3D11GraphicsEngine::DrawScreenFade( void* c ) { case zRND_ALPHA_FUNC_BLEND_TEST: case zRND_ALPHA_FUNC_SUB: { Engine::GAPI->GetRendererState().BlendState.SetAlphaBlending(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); break; } case zRND_ALPHA_FUNC_ADD: { Engine::GAPI->GetRendererState().BlendState.SetAdditiveBlending(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); break; } case zRND_ALPHA_FUNC_MUL: { Engine::GAPI->GetRendererState().BlendState.SetModulateBlending(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); break; } case zRND_ALPHA_FUNC_MUL2: { Engine::GAPI->GetRendererState().BlendState.SetModulate2Blending(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); break; } } Engine::GAPI->GetRendererState().DepthState.DepthBufferCompareFunc = GothicDepthBufferStateInfo::CF_COMPARISON_ALWAYS; Engine::GAPI->GetRendererState().DepthState.DepthWriteEnabled = false; - Engine::GAPI->GetRendererState().DepthState.SetDirty(); if ( haveTexture ) SetActivePixelShader( PShaderID::PS_PFX_Alpha_Blend ); @@ -2001,7 +1993,6 @@ XRESULT D3D11GraphicsEngine::DrawScreenFade( void* c ) { // Disable culling for ui rendering(Sprite from LeGo needs it since it use CCW instead of CW order) SetDefaultStates(); Engine::GAPI->GetRendererState().RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_NONE; - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); UpdateRenderStates(); } return XR_SUCCESS; @@ -2853,8 +2844,8 @@ XRESULT D3D11GraphicsEngine::UnbindTexture( int slot ) { /** Recreates the renderstates */ XRESULT D3D11GraphicsEngine::UpdateRenderStates() { - if ( Engine::GAPI->GetRendererState().BlendState.StateDirty && - Engine::GAPI->GetRendererState().BlendState.Hash != FFBlendStateHash ) { + Engine::GAPI->GetRendererState().BlendState.ComputeHash(); + if ( Engine::GAPI->GetRendererState().BlendState.Hash != FFBlendStateHash ) { D3D11BlendStateInfo* state = static_cast (GothicStateCache::s_BlendStateMap[Engine::GAPI->GetRendererState().BlendState]); @@ -2869,13 +2860,12 @@ XRESULT D3D11GraphicsEngine::UpdateRenderStates() { FFBlendState = state->State.Get(); FFBlendStateHash = Engine::GAPI->GetRendererState().BlendState.Hash; - Engine::GAPI->GetRendererState().BlendState.StateDirty = false; GetContext()->OMSetBlendState( FFBlendState.Get(), float4( 0, 0, 0, 0 ).toPtr(), 0xFFFFFFFF ); } - if ( Engine::GAPI->GetRendererState().RasterizerState.StateDirty && - Engine::GAPI->GetRendererState().RasterizerState.Hash != + Engine::GAPI->GetRendererState().RasterizerState.ComputeHash(); + if ( Engine::GAPI->GetRendererState().RasterizerState.Hash != FFRasterizerStateHash ) { D3D11RasterizerStateInfo* state = static_cast (GothicStateCache::s_RasterizerStateMap[Engine::GAPI->GetRendererState().RasterizerState]); @@ -2891,12 +2881,11 @@ XRESULT D3D11GraphicsEngine::UpdateRenderStates() { FFRasterizerState = state->State.Get(); FFRasterizerStateHash = Engine::GAPI->GetRendererState().RasterizerState.Hash; - Engine::GAPI->GetRendererState().RasterizerState.StateDirty = false; GetContext()->RSSetState( FFRasterizerState.Get() ); } - if ( Engine::GAPI->GetRendererState().DepthState.StateDirty && - Engine::GAPI->GetRendererState().DepthState.Hash != + Engine::GAPI->GetRendererState().DepthState.ComputeHash(); + if ( Engine::GAPI->GetRendererState().DepthState.Hash != FFDepthStencilStateHash ) { D3D11DepthBufferState* state = static_cast (GothicStateCache::s_DepthBufferMap[Engine::GAPI->GetRendererState().DepthState]); @@ -2912,7 +2901,6 @@ XRESULT D3D11GraphicsEngine::UpdateRenderStates() { FFDepthStencilState = state->State.Get(); FFDepthStencilStateHash = Engine::GAPI->GetRendererState().DepthState.Hash; - Engine::GAPI->GetRendererState().DepthState.StateDirty = false; GetContext()->OMSetDepthStencilState( FFDepthStencilState.Get(), 0 ); } @@ -3017,7 +3005,6 @@ XRESULT D3D11GraphicsEngine::OnStartWorldRendering() { 0, 0, 0 ).toPtr() ); rendererState.RasterizerState.FrontCounterClockwise = false; - rendererState.RasterizerState.SetDirty(); RGResourceHandle colorResource; graph.AddPass( L"Initialize Buffers", [&]( RGBuilder& builder, RenderPass& pass ) { @@ -3694,7 +3681,6 @@ XRESULT D3D11GraphicsEngine::OnStartWorldRendering() { // Disable culling for ui rendering(Sprite from LeGo needs it since it use CCW instead of CW order) SetDefaultStates(); rendererState.RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_NONE; - rendererState.RasterizerState.SetDirty(); UpdateRenderStates(); GetContext()->PSSetSamplers( 0, 1, ClampSamplerState.GetAddressOf() ); @@ -3818,7 +3804,6 @@ XRESULT D3D11GraphicsEngine::DrawMeshInfoListAlphablended( // Setup renderstates Engine::GAPI->GetRendererState().RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_NONE; - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); XMMATRIX view = Engine::GAPI->GetViewMatrixXM(); Engine::GAPI->SetViewTransformXM( view ); @@ -3884,10 +3869,8 @@ XRESULT D3D11GraphicsEngine::DrawMeshInfoListAlphablended( if ( alphaFunc == zMAT_ALPHA_FUNC_ADD ) Engine::GAPI->GetRendererState().BlendState.SetAdditiveBlending(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); Engine::GAPI->GetRendererState().DepthState.DepthWriteEnabled = false; - Engine::GAPI->GetRendererState().DepthState.SetDirty(); UpdateRenderStates(); lastAlphaFunc = alphaFunc; @@ -3909,9 +3892,7 @@ XRESULT D3D11GraphicsEngine::DrawMeshInfoListAlphablended( } Engine::GAPI->GetRendererState().DepthState.DepthWriteEnabled = true; - Engine::GAPI->GetRendererState().DepthState.SetDirty(); Engine::GAPI->GetRendererState().BlendState.ColorWritesEnabled = false; - Engine::GAPI->GetRendererState().BlendState.SetDirty(); UpdateRenderStates(); @@ -4416,7 +4397,6 @@ void D3D11GraphicsEngine::DrawWaterSurfaces() { // Setup render states for z-prepass Engine::GAPI->GetRendererState().BlendState.ColorWritesEnabled = false; // Rasterization is faster without writes - Engine::GAPI->GetRendererState().BlendState.SetDirty(); UpdateRenderStates(); // Bind vertex water shader @@ -4447,10 +4427,8 @@ void D3D11GraphicsEngine::DrawWaterSurfaces() { // Disable depth writes after z-prepass Engine::GAPI->GetRendererState().BlendState.ColorWritesEnabled = true; - Engine::GAPI->GetRendererState().BlendState.SetDirty(); Engine::GAPI->GetRendererState().DepthState.DepthWriteEnabled = false; // Rasterization is faster without writes - Engine::GAPI->GetRendererState().DepthState.SetDirty(); UpdateRenderStates(); // Bind pixel water shader @@ -4514,11 +4492,9 @@ void XM_CALLCONV D3D11GraphicsEngine::DrawWorldAround( cullFront ? GothicRasterizerStateInfo::CM_CULL_FRONT : GothicRasterizerStateInfo::CM_CULL_NONE; Engine::GAPI->GetRendererState().RasterizerState.DepthClipEnable = true; - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); Engine::GAPI->GetRendererState().DepthState.SetDefault(); Engine::GAPI->GetRendererState().DepthState.DepthBufferCompareFunc = GothicDepthBufferStateInfo::ECompareFunc::CF_COMPARISON_LESS_EQUAL; - Engine::GAPI->GetRendererState().DepthState.SetDirty(); Context->PSSetShaderResources( 0, 6, s_nullSRVs ); @@ -4829,11 +4805,9 @@ void XM_CALLCONV D3D11GraphicsEngine::DrawWorldAround_Layered( cullFront ? GothicRasterizerStateInfo::CM_CULL_FRONT : GothicRasterizerStateInfo::CM_CULL_NONE; Engine::GAPI->GetRendererState().RasterizerState.DepthClipEnable = true; - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); Engine::GAPI->GetRendererState().DepthState.SetDefault(); Engine::GAPI->GetRendererState().DepthState.DepthBufferCompareFunc = GothicDepthBufferStateInfo::ECompareFunc::CF_COMPARISON_LESS_EQUAL; - Engine::GAPI->GetRendererState().DepthState.SetDirty(); Context->PSSetShaderResources( 0, 6, s_nullSRVs ); @@ -5350,11 +5324,9 @@ void XM_CALLCONV D3D11GraphicsEngine::DrawWorldAroundForWorldShadow( FXMVECTOR p GothicRasterizerStateInfo::CM_CULL_NONE; Engine::GAPI->GetRendererState().RasterizerState.DepthClipEnable = true; - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); Engine::GAPI->GetRendererState().DepthState.SetDefault(); Engine::GAPI->GetRendererState().DepthState.DepthBufferCompareFunc = GothicDepthBufferStateInfo::ECompareFunc::CF_COMPARISON_LESS_EQUAL; - Engine::GAPI->GetRendererState().DepthState.SetDirty(); XMMATRIX view = Engine::GAPI->GetViewMatrixXM(); @@ -5821,7 +5793,6 @@ void XM_CALLCONV D3D11GraphicsEngine::DrawWorldAroundForWorldShadow( FXMVECTOR p } Engine::GAPI->GetRendererState().BlendState.ColorWritesEnabled = true; - Engine::GAPI->GetRendererState().BlendState.SetDirty(); } /** Update morph mesh visual */ @@ -6511,10 +6482,8 @@ XRESULT D3D11GraphicsEngine::DrawFrameAlphaMeshes() else if ( blendBlend ) Engine::GAPI->GetRendererState().BlendState.SetAlphaBlending(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); Engine::GAPI->GetRendererState().DepthState.DepthWriteEnabled = false; - Engine::GAPI->GetRendererState().DepthState.SetDirty(); UpdateRenderStates(); } @@ -6570,7 +6539,6 @@ XRESULT D3D11GraphicsEngine::DrawPolyStrips( bool noTextures ) { // Setup renderstates Engine::GAPI->GetRendererState().RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_NONE; - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); XMMATRIX view = Engine::GAPI->GetViewMatrixXM(); Engine::GAPI->SetViewTransformXM( view ); @@ -6638,10 +6606,7 @@ XRESULT D3D11GraphicsEngine::DrawPolyStrips( bool noTextures ) { else if ( blendBlend ) Engine::GAPI->GetRendererState().BlendState.SetAlphaBlending(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); - Engine::GAPI->GetRendererState().DepthState.DepthWriteEnabled = false; - Engine::GAPI->GetRendererState().DepthState.SetDirty(); UpdateRenderStates(); } @@ -6672,10 +6637,6 @@ void D3D11GraphicsEngine::SetDefaultStates( bool force ) { Engine::GAPI->GetRendererState().BlendState.SetDefault(); Engine::GAPI->GetRendererState().DepthState.SetDefault(); - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); - Engine::GAPI->GetRendererState().DepthState.SetDirty(); - if ( force ) { FFRasterizerStateHash = 0; FFBlendStateHash = 0; @@ -6684,6 +6645,12 @@ void D3D11GraphicsEngine::SetDefaultStates( bool force ) { } } +void D3D11GraphicsEngine::InvalidateStateCache() { + FFRasterizerStateHash = 0; + FFBlendStateHash = 0; + FFDepthStencilStateHash = 0; +} + /** Draws the sky using the GSky-Object */ XRESULT D3D11GraphicsEngine::DrawSky() { GSky* sky = Engine::GAPI->GetSky(); @@ -6691,7 +6658,6 @@ XRESULT D3D11GraphicsEngine::DrawSky() { if ( !Engine::GAPI->GetRendererState().RendererSettings.AtmosphericScattering ) { Engine::GAPI->GetRendererState().DepthState.DepthWriteEnabled = false; - Engine::GAPI->GetRendererState().DepthState.SetDirty(); UpdateRenderStates(); #if defined(BUILD_GOTHIC_1_08k) && !defined(BUILD_1_12F) @@ -6759,11 +6725,8 @@ XRESULT D3D11GraphicsEngine::DrawSky() { Engine::GAPI->GetRendererState().DepthState.DepthWriteEnabled = false; Engine::GAPI->GetRendererState().RasterizerState.SetDefault(); - Engine::GAPI->GetRendererState().DepthState.SetDirty(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); Engine::GAPI->GetRendererState().RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_BACK; - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); SetupVS_ExMeshDrawCall(); SetupVS_ExConstantBuffer(); @@ -6785,7 +6748,6 @@ XRESULT D3D11GraphicsEngine::DrawSky() { { SetDefaultStates(); Engine::GAPI->GetRendererState().DepthState.DepthWriteEnabled = false; - Engine::GAPI->GetRendererState().DepthState.SetDirty(); UpdateRenderStates(); // Draw barrier after sky @@ -6909,7 +6871,6 @@ void D3D11GraphicsEngine::DrawVobSingle( VobInfo* vob, zCCamera& camera ) { // Set backface culling Engine::GAPI->GetRendererState().RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_BACK; - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); GetContext()->PSSetSamplers( 0, 1, DefaultSamplerState.GetAddressOf() ); SetActivePixelShader( PShaderID::PS_Preview_Textured ); @@ -6944,7 +6905,6 @@ void D3D11GraphicsEngine::DrawVobSingle( VobInfo* vob, zCCamera& camera ) { // Disable culling again Engine::GAPI->GetRendererState().RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_NONE; - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); GetContext()->PSSetSamplers( 0, 1, ClampSamplerState.GetAddressOf() ); } @@ -7189,7 +7149,6 @@ void D3D11GraphicsEngine::DrawDecalList( const std::vector& decals, auto _ = RecordGraphicsEvent(L"DrawDecalList"); Engine::GAPI->GetRendererState().RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_NONE; - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); XMMATRIX view = Engine::GAPI->GetViewMatrixXM(); Engine::GAPI->SetViewTransformXM( view ); // Update view transform @@ -7198,7 +7157,6 @@ void D3D11GraphicsEngine::DrawDecalList( const std::vector& decals, if ( !lighting ) { SetActivePixelShader( PShaderID::PS_Transparency ); Engine::GAPI->GetRendererState().DepthState.DepthWriteEnabled = false; - Engine::GAPI->GetRendererState().DepthState.SetDirty(); } else { SetActivePixelShader( PShaderID::PS_World ); } @@ -7272,7 +7230,6 @@ void D3D11GraphicsEngine::DrawDecalList( const std::vector& decals, } if ( lastAlphaFunc != alphaFunc ) { - Engine::GAPI->GetRendererState().BlendState.SetDirty(); UpdateRenderStates(); lastAlphaFunc = alphaFunc; } @@ -7346,7 +7303,6 @@ void D3D11GraphicsEngine::DrawQuadMarks() { Engine::GAPI->SetViewTransformXM( view ); // Update view transform Engine::GAPI->GetRendererState().RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_NONE; - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); ActivePS->GetBuffer( "FFPipelineConstantBuffer" ) .Update( &Engine::GAPI->GetRendererState().GraphicsState ) @@ -7400,7 +7356,6 @@ void D3D11GraphicsEngine::DrawQuadMarks() { alphaFunc = mat->GetAlphaFunc(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); UpdateRenderStates(); } @@ -7426,9 +7381,7 @@ void D3D11GraphicsEngine::DrawMQuadMarks() { Engine::GAPI->SetViewTransformXM( view ); // Update view transform Engine::GAPI->GetRendererState().RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_NONE; - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); Engine::GAPI->GetRendererState().DepthState.DepthWriteEnabled = false; - Engine::GAPI->GetRendererState().DepthState.SetDirty(); SetupVS_ExMeshDrawCall(); SetupVS_ExConstantBuffer(); @@ -7458,7 +7411,6 @@ void D3D11GraphicsEngine::DrawMQuadMarks() { alphaFunc = mat->GetAlphaFunc(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); UpdateRenderStates(); } @@ -7530,7 +7482,6 @@ void D3D11GraphicsEngine::DrawFrameParticleMeshes( std::unordered_mapGetRendererState(); state.DepthState.DepthWriteEnabled = false; - state.DepthState.SetDirty(); XMMATRIX view = Engine::GAPI->GetViewMatrixXM(); Engine::GAPI->SetViewTransformXM( view ); @@ -7569,19 +7520,15 @@ void D3D11GraphicsEngine::DrawFrameParticleMeshes( std::unordered_mapGetRendererState(); state.BlendState.SetAdditiveBlending(); - state.BlendState.SetDirty(); state.DepthState.DepthWriteEnabled = false; - state.DepthState.SetDirty(); state.RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_NONE; - state.RasterizerState.SetDirty(); std::vector*>> pvecAdd; std::vector*>> pvecRest; @@ -7743,7 +7687,6 @@ void D3D11GraphicsEngine::DrawFrameParticles( if ( partInfo.BlendMode != lastBlendMode ) { // Setup blend state state.BlendState = blendState; - state.BlendState.SetDirty(); lastBlendMode = partInfo.BlendMode; UpdateRenderStates(); @@ -7757,7 +7700,6 @@ void D3D11GraphicsEngine::DrawFrameParticles( Context->IASetPrimitiveTopology( D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST ); state.BlendState.SetDefault(); - state.BlendState.SetDirty(); bufferParticleColor->BindToPixelShader( Context.Get(), 1 ); bufferParticleDistortion->BindToPixelShader( Context.Get(), 2 ); @@ -7804,17 +7746,14 @@ void D3D11GraphicsEngine::UpdateOcclusion() { // Set up states Engine::GAPI->GetRendererState().RasterizerState.SetDefault(); Engine::GAPI->GetRendererState().RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_NONE; - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); Engine::GAPI->GetRendererState().BlendState.SetDefault(); Engine::GAPI->GetRendererState().BlendState.ColorWritesEnabled = false; // Rasterization is faster without writes - Engine::GAPI->GetRendererState().BlendState.SetDirty(); Engine::GAPI->GetRendererState().DepthState.SetDefault(); Engine::GAPI->GetRendererState().DepthState.DepthWriteEnabled = false; // Don't write the bsp-nodes to the depth buffer, also quicker - Engine::GAPI->GetRendererState().DepthState.SetDirty(); UpdateRenderStates(); @@ -8036,7 +7975,6 @@ void D3D11GraphicsEngine::DrawString( const std::string& str, float x, float y, Engine::GAPI->GetRendererState().DepthState.DepthWriteEnabled = false; Engine::GAPI->GetRendererState().DepthState.DepthBufferCompareFunc = GothicDepthBufferStateInfo::CF_COMPARISON_ALWAYS; - Engine::GAPI->GetRendererState().DepthState.SetDirty(); UpdateRenderStates(); @@ -8092,7 +8030,6 @@ void D3D11GraphicsEngine::DrawString( const std::string& str, float x, float y, DrawVertexBuffer( TempVertexBuffer.get(), vertices.size(), sizeof( ExVertexStruct ) ); oldDepthState.ApplyTo( Engine::GAPI->GetRendererState().DepthState ); - Engine::GAPI->GetRendererState().DepthState.SetDirty(); UpdateRenderStates(); diff --git a/D3D11Engine/D3D11GraphicsEngine.h b/D3D11Engine/D3D11GraphicsEngine.h index 02510676..de2e7cd1 100644 --- a/D3D11Engine/D3D11GraphicsEngine.h +++ b/D3D11Engine/D3D11GraphicsEngine.h @@ -215,6 +215,11 @@ class D3D11GraphicsEngine : public D3D11GraphicsEngineBase { /** Sets up the default rendering state */ void SetDefaultStates( bool force = false ); + /** Invalidates the cached FF state hashes, forcing the next UpdateRenderStates() + * to re-apply all states to D3D11. Call after any code that sets D3D11 states + * directly (e.g. ImGui, external libraries). */ + void InvalidateStateCache(); + /** Returns the current resolution (Maybe supersampled)*/ INT2 GetResolution() override { return m_scaledResolution; }; diff --git a/D3D11Engine/D3D11GraphicsEngineBase.cpp b/D3D11Engine/D3D11GraphicsEngineBase.cpp index 2beb8819..dca9838c 100644 --- a/D3D11Engine/D3D11GraphicsEngineBase.cpp +++ b/D3D11Engine/D3D11GraphicsEngineBase.cpp @@ -167,11 +167,6 @@ void D3D11GraphicsEngineBase::SetDefaultStates() { Engine::GAPI->GetRendererState().DepthState.SetDefault(); Engine::GAPI->GetRendererState().SamplerState.SetDefault(); - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); - Engine::GAPI->GetRendererState().DepthState.SetDirty(); - Engine::GAPI->GetRendererState().SamplerState.SetDirty(); - GetContext()->PSSetSamplers( 0, 1, DefaultSamplerState.GetAddressOf() ); UpdateRenderStates(); diff --git a/D3D11Engine/D3D11LineRenderer.cpp b/D3D11Engine/D3D11LineRenderer.cpp index d9c4ec9f..6008bcea 100644 --- a/D3D11Engine/D3D11LineRenderer.cpp +++ b/D3D11Engine/D3D11LineRenderer.cpp @@ -65,7 +65,6 @@ XRESULT D3D11LineRenderer::Flush() { engine->SetDefaultStates(); Engine::GAPI->GetRendererState().BlendState.SetAlphaBlending(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); engine->SetupVS_ExMeshDrawCall(); engine->SetupVS_ExConstantBuffer(); @@ -112,7 +111,6 @@ XRESULT D3D11LineRenderer::FlushScreenSpace() { engine->SetDefaultStates(); Engine::GAPI->GetRendererState().BlendState.SetAlphaBlending(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); engine->SetupVS_ExMeshDrawCall(); engine->GetContext()->IASetPrimitiveTopology( D3D11_PRIMITIVE_TOPOLOGY_LINELIST ); diff --git a/D3D11Engine/D3D11PFX_DistanceBlur.cpp b/D3D11Engine/D3D11PFX_DistanceBlur.cpp index 8cd5beb5..505477e8 100644 --- a/D3D11Engine/D3D11PFX_DistanceBlur.cpp +++ b/D3D11Engine/D3D11PFX_DistanceBlur.cpp @@ -28,7 +28,6 @@ XRESULT D3D11PFX_DistanceBlur::Render( ID3D11ShaderResourceView* diffuse ) { auto ps = engine->GetShaderManager().GetPShader( PShaderID::PS_PFX_DistanceBlur ); Engine::GAPI->GetRendererState().BlendState.SetDefault(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); // Copy scene auto tempBuffer = FxRenderer->GetTempBuffer(); diff --git a/D3D11Engine/D3D11PFX_GodRays.cpp b/D3D11Engine/D3D11PFX_GodRays.cpp index b75456ca..87319373 100644 --- a/D3D11Engine/D3D11PFX_GodRays.cpp +++ b/D3D11Engine/D3D11PFX_GodRays.cpp @@ -102,7 +102,6 @@ XRESULT D3D11PFX_GodRays::Render( // Upscale and blend Engine::GAPI->GetRendererState().BlendState.SetAdditiveBlending(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); FxRenderer->CopyTextureToRTV( tempBuffer2->GetShaderResView(), oldRTV, engine->GetResolution() ); diff --git a/D3D11Engine/D3D11PFX_HDR.cpp b/D3D11Engine/D3D11PFX_HDR.cpp index 4fe911c1..d0ae7b7f 100644 --- a/D3D11Engine/D3D11PFX_HDR.cpp +++ b/D3D11Engine/D3D11PFX_HDR.cpp @@ -41,7 +41,6 @@ XRESULT D3D11PFX_HDR::Render( ID3D11RenderTargetView* output, ID3D11ShaderResour D3D11GraphicsEngine* engine = reinterpret_cast(Engine::GraphicsEngine); engine->SetDefaultStates(); Engine::GAPI->GetRendererState().BlendState.BlendEnabled = false; - Engine::GAPI->GetRendererState().BlendState.SetDirty(); engine->UpdateRenderStates(); // Save old rendertargets diff --git a/D3D11Engine/D3D11PFX_HeightFog.cpp b/D3D11Engine/D3D11PFX_HeightFog.cpp index db270e57..7614ec44 100644 --- a/D3D11Engine/D3D11PFX_HeightFog.cpp +++ b/D3D11Engine/D3D11PFX_HeightFog.cpp @@ -120,11 +120,9 @@ XRESULT D3D11PFX_HeightFog::Render( RenderToTextureBuffer* fxbuffer ) { engine->SetDefaultStates(); Engine::GAPI->GetRendererState().RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_NONE; - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); Engine::GAPI->GetRendererState().BlendState.SetDefault(); //Engine::GAPI->GetRendererState().BlendState.SetAdditiveBlending(); Engine::GAPI->GetRendererState().BlendState.BlendEnabled = true; - Engine::GAPI->GetRendererState().BlendState.SetDirty(); // Copy FxRenderer->DrawFullScreenQuad(); diff --git a/D3D11Engine/D3D11PFX_SMAA.cpp b/D3D11Engine/D3D11PFX_SMAA.cpp index 0b2217b7..14ed5525 100644 --- a/D3D11Engine/D3D11PFX_SMAA.cpp +++ b/D3D11Engine/D3D11PFX_SMAA.cpp @@ -30,7 +30,18 @@ void D3D11PFX_SMAA::RenderPostFX( const Microsoft::WRL::ComPtr(Engine::GraphicsEngine); ID3D11DeviceContext* pContext = engine->GetContext().Get(); - engine->SetDefaultStates(); + // Configure states that SMAA needs through the Gothic state system + auto& state = Engine::GAPI->GetRendererState(); + state.RasterizerState.SetDefault(); + state.RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_NONE; + state.RasterizerState.DepthClipEnable = true; + + state.DepthState.DepthBufferEnabled = false; + state.DepthState.DepthWriteEnabled = false; + state.DepthState.DepthBufferCompareFunc = GothicDepthBufferStateInfo::CF_COMPARISON_ALWAYS; + + state.BlendState.SetDefault(); + engine->UpdateRenderStates(); Microsoft::WRL::ComPtr OldRTV; diff --git a/D3D11Engine/D3D11PipelineStateObject.cpp b/D3D11Engine/D3D11PipelineStateObject.cpp index 77cc45ef..cd157a26 100644 --- a/D3D11Engine/D3D11PipelineStateObject.cpp +++ b/D3D11Engine/D3D11PipelineStateObject.cpp @@ -39,9 +39,9 @@ D3D11PipelineStateObject::D3D11PipelineStateObject( const Desc& desc ) memcpy( m_RTVFormats, desc.RTVFormats, sizeof( m_RTVFormats ) ); // Ensure the Gothic state hashes are up to date - m_BlendState.SetDirty(); - m_RasterizerState.SetDirty(); - m_DepthStencilState.SetDirty(); + m_BlendState.ComputeHash(); + m_RasterizerState.ComputeHash(); + m_DepthStencilState.ComputeHash(); ComputeHash(); } @@ -63,9 +63,12 @@ void D3D11PipelineStateObject::ComputeHash() { HashPointer( m_Hash, m_GS.get() ); HashPointer( m_Hash, m_HDS.get() ); - // Fixed-function state hashes (already computed by SetDirty) + // Fixed-function state hashes + m_BlendState.ComputeHash(); Toolbox::hash_combine( m_Hash, static_cast( m_BlendState.Hash ) ); + m_RasterizerState.ComputeHash(); Toolbox::hash_combine( m_Hash, static_cast( m_RasterizerState.Hash ) ); + m_DepthStencilState.ComputeHash(); Toolbox::hash_combine( m_Hash, static_cast( m_DepthStencilState.Hash ) ); // Sample mask diff --git a/D3D11Engine/D3D11ShadowMap.cpp b/D3D11Engine/D3D11ShadowMap.cpp index d2bcea99..dc083471 100644 --- a/D3D11Engine/D3D11ShadowMap.cpp +++ b/D3D11Engine/D3D11ShadowMap.cpp @@ -1044,7 +1044,6 @@ void D3D11ShadowMap::RenderShadowmaps( const RenderShadowmapsParams& params ) { m_context->OMSetRenderTargets( 1, params.DebugRTV.GetAddressOf(), dsvOverwrite.Get() ); Engine::GAPI->GetRendererState().BlendState.ColorWritesEnabled = true; } - Engine::GAPI->GetRendererState().BlendState.SetDirty(); // Dont render shadows from the sun when it isn't on the sky if ( isNotWorldShadowMap || @@ -1100,13 +1099,10 @@ XRESULT D3D11ShadowMap::DrawWorldLights(ID3D11RenderTargetView* outputRTV) auto& settings = Engine::GAPI->GetRendererState().RendererSettings; Engine::GAPI->GetRendererState().BlendState.SetAdditiveBlending(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); Engine::GAPI->GetRendererState().DepthState.DepthBufferCompareFunc = GothicDepthBufferStateInfo::CF_COMPARISON_ALWAYS; - Engine::GAPI->GetRendererState().DepthState.SetDirty(); Engine::GAPI->GetRendererState().RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_NONE; - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); // Modify light when raining float rain = Engine::GAPI->GetRainFXWeight(); @@ -1296,12 +1292,10 @@ void XM_CALLCONV D3D11ShadowMap::RenderShadowCube( Engine::GAPI->GetRendererState().BlendState.ColorWritesEnabled = true; // Should be false, but needs to be true for SV_Depth to work - Engine::GAPI->GetRendererState().BlendState.SetDirty(); } else { m_context->OMSetRenderTargets( 1, debugRTV.GetAddressOf(), face.Get() ); Engine::GAPI->GetRendererState().BlendState.ColorWritesEnabled = true; - Engine::GAPI->GetRendererState().BlendState.SetDirty(); } // Always render shadowcube when dynamic shadows are enabled diff --git a/D3D11Engine/D3D7/MyDirect3DDevice7.h b/D3D11Engine/D3D7/MyDirect3DDevice7.h index 874cab0b..ba75e5eb 100644 --- a/D3D11Engine/D3D7/MyDirect3DDevice7.h +++ b/D3D11Engine/D3D7/MyDirect3DDevice7.h @@ -246,15 +246,15 @@ class MyDirect3DDevice7 : public IDirect3DDevice7 { } break; - case D3DRENDERSTATE_ZENABLE: state.DepthState.DepthBufferEnabled = Value != 0; state.DepthState.SetDirty(); break; + case D3DRENDERSTATE_ZENABLE: state.DepthState.DepthBufferEnabled = Value != 0; break; case D3DRENDERSTATE_ALPHATESTENABLE: state.GraphicsState.SetGraphicsSwitch( GSWITCH_ALPHAREF, Value != 0 ); break; - case D3DRENDERSTATE_SRCBLEND: state.BlendState.SrcBlend = static_cast(Value); state.BlendState.SetDirty(); break; - case D3DRENDERSTATE_DESTBLEND: state.BlendState.DestBlend = static_cast(Value); state.BlendState.SetDirty(); break; - //case D3DRENDERSTATE_CULLMODE: state.RasterizerState.CullMode = static_cast(Value); state.RasterizerState.SetDirty(); break; - case D3DRENDERSTATE_ZFUNC: state.DepthState.DepthBufferCompareFunc = static_cast(Value); state.DepthState.SetDirty(); break; + case D3DRENDERSTATE_SRCBLEND: state.BlendState.SrcBlend = static_cast(Value); break; + case D3DRENDERSTATE_DESTBLEND: state.BlendState.DestBlend = static_cast(Value); break; + //case D3DRENDERSTATE_CULLMODE: state.RasterizerState.CullMode = static_cast(Value); break; + case D3DRENDERSTATE_ZFUNC: state.DepthState.DepthBufferCompareFunc = static_cast(Value); break; case D3DRENDERSTATE_ALPHAREF: state.GraphicsState.FF_AlphaRef = static_cast(Value) / 255.0f; break; // Ref for masked - case D3DRENDERSTATE_ALPHABLENDENABLE: state.BlendState.BlendEnabled = Value != 0; state.BlendState.SetDirty(); break; - case D3DRENDERSTATE_ZBIAS: state.RasterizerState.ZBias = Value; state.DepthState.SetDirty(); break; + case D3DRENDERSTATE_ALPHABLENDENABLE: state.BlendState.BlendEnabled = Value != 0; break; + case D3DRENDERSTATE_ZBIAS: state.RasterizerState.ZBias = Value; break; case D3DRENDERSTATE_TEXTUREFACTOR: state.GraphicsState.FF_TextureFactor = float4( Value ); break; case D3DRENDERSTATE_LIGHTING: state.GraphicsState.SetGraphicsSwitch( GSWITCH_LIGHING, Value != 0 ); break; } @@ -346,15 +346,12 @@ class MyDirect3DDevice7 : public IDirect3DDevice7 { case D3DTSS_ADDRESS: state.SamplerState.AddressU = static_cast(Value); state.SamplerState.AddressV = static_cast(Value); - state.SamplerState.SetDirty(); break; case D3DTSS_ADDRESSU: state.SamplerState.AddressU = static_cast(Value); - state.SamplerState.SetDirty(); break; case D3DTSS_ADDRESSV: state.SamplerState.AddressV = static_cast(Value); - state.SamplerState.SetDirty(); break; case D3DTSS_BORDERCOLOR: break; @@ -509,7 +506,6 @@ class MyDirect3DDevice7 : public IDirect3DDevice7 { // Gothic wants that for the sky Engine::GAPI->GetRendererState().RasterizerState.FrontCounterClockwise = true; - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); Engine::GraphicsEngine->SetActiveVertexShader( VShaderID::VS_TransformedEx ); Engine::GraphicsEngine->BindViewportInformation( VShaderID::VS_TransformedEx, 0 ); break; @@ -573,7 +569,6 @@ class MyDirect3DDevice7 : public IDirect3DDevice7 { // Gothic wants that for the sky Engine::GAPI->GetRendererState().RasterizerState.FrontCounterClockwise = true; - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); Engine::GraphicsEngine->DrawVertexBufferFF( static_cast(lpd3dVertexBuffer)->GetVertexBuffer(), dwNumVertices, dwStartVertex, sizeof( Gothic_XYZRHW_DIF_T1_Vertex ) ); break; diff --git a/D3D11Engine/EditorLinePrimitive.cpp b/D3D11Engine/EditorLinePrimitive.cpp index df0fb872..660327e4 100644 --- a/D3D11Engine/EditorLinePrimitive.cpp +++ b/D3D11Engine/EditorLinePrimitive.cpp @@ -857,7 +857,6 @@ void EditorLinePrimitive::RenderVertexBuffer( const Microsoft::WRL::ComPtrSetDefaultStates(); Engine::GAPI->GetRendererState().BlendState.SetAlphaBlending(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); engine->UpdateRenderStates(); Shader->Apply(); diff --git a/D3D11Engine/GVegetationBox.cpp b/D3D11Engine/GVegetationBox.cpp index 8d6eb97b..523d51e5 100644 --- a/D3D11Engine/GVegetationBox.cpp +++ b/D3D11Engine/GVegetationBox.cpp @@ -309,7 +309,6 @@ void GVegetationBox::RenderVegetation( const XMFLOAT3& eye ) { VegetationTexture->BindToPixelShader( 1 ); Engine::GAPI->GetRendererState().RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_NONE; - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); // Enable alpha-to-coverage @@ -317,7 +316,6 @@ void GVegetationBox::RenderVegetation( const XMFLOAT3& eye ) { Engine::GAPI->GetRendererState().BlendState.SetDefault(); Engine::GAPI->GetRendererState().BlendState.BlendEnabled = false; Engine::GAPI->GetRendererState().BlendState.AlphaToCoverage = Engine::GAPI->GetRendererState().RendererSettings.VegetationAlphaToCoverage; - Engine::GAPI->GetRendererState().BlendState.SetDirty(); } Engine::GraphicsEngine->SetActiveVertexShader( VShaderID::VS_GrassInstanced ); @@ -356,7 +354,6 @@ void GVegetationBox::RenderVegetation( const XMFLOAT3& eye ) { if ( Engine::GAPI->GetRendererState().RendererSettings.VegetationAlphaToCoverage ) { Engine::GAPI->GetRendererState().BlendState.SetDefault(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); } } diff --git a/D3D11Engine/GothicAPI.cpp b/D3D11Engine/GothicAPI.cpp index bf6e9769..7fc3c70b 100644 --- a/D3D11Engine/GothicAPI.cpp +++ b/D3D11Engine/GothicAPI.cpp @@ -1267,7 +1267,6 @@ void GothicAPI::DrawWorldMeshNaive() { // Set up frustum for the camera RendererState.RasterizerState.SetDefault(); - RendererState.RasterizerState.SetDirty(); zCCamera::GetCamera()->Activate(); auto drawRadius = RendererState.RendererSettings.SkeletalMeshDrawRadius; @@ -2879,11 +2878,8 @@ void GothicAPI::DrawTransparencyVobs() { if ( !TransparencyVobs.empty() ) { // Setup alpha blending RendererState.RasterizerState.SetDefault(); - RendererState.RasterizerState.SetDirty(); RendererState.BlendState.SetAlphaBlending(); - RendererState.BlendState.SetDirty(); RendererState.DepthState.SetDefault(); - RendererState.DepthState.SetDirty(); } auto psBufGAI = g->GetShaderManager().GetPShader( PShaderID::PS_Transparency )->GetBuffer( "GhostAlphaInfo" ); @@ -2968,11 +2964,8 @@ void GothicAPI::DrawSkeletalVN() { SkeletalVobInfo* vi = VNSkeletalVobs.back(); RendererState.RasterizerState.SetDefault(); - RendererState.RasterizerState.SetDirty(); RendererState.BlendState.SetAlphaBlending(); - RendererState.BlendState.SetDirty(); RendererState.DepthState.SetDefault(); - RendererState.DepthState.SetDirty(); D3D11GraphicsEngine* g = reinterpret_cast(Engine::GraphicsEngine); diff --git a/D3D11Engine/GothicGraphicsState.h b/D3D11Engine/GothicGraphicsState.h index 07234fe0..0dd4b3ec 100644 --- a/D3D11Engine/GothicGraphicsState.h +++ b/D3D11Engine/GothicGraphicsState.h @@ -117,18 +117,13 @@ struct GothicGraphicsState { }; __declspec(align(4)) struct GothicPipelineState { - /** Sets this state dirty, which means that it will be updated before next rendering */ - void SetDirty() { - StateDirty = true; - HashThis( reinterpret_cast(this), StructSize ); - } - - /** Hashes the whole struct */ - void HashThis( char* data, int size ) { + /** Recomputes the hash from current state data. Called automatically by UpdateRenderStates(). */ + void ComputeHash() { Hash = 0; - // Start hashing at the data of the other structs, skip the data of this one - for ( int i = sizeof( GothicPipelineState ); i < size; i += 4 ) { + // Hash the derived struct data, skipping the base GothicPipelineState fields + char* data = reinterpret_cast(this); + for ( int i = sizeof( GothicPipelineState ); i < StructSize; i += 4 ) { DWORD d; memcpy( &d, data + i, 4 ); @@ -140,7 +135,6 @@ __declspec(align(4)) struct GothicPipelineState { return Hash == o.Hash; } - bool StateDirty; size_t Hash; int StructSize; }; @@ -220,7 +214,6 @@ struct GothicDepthBufferStateInfo : public GothicPipelineState { c.DepthWriteEnabled = DepthWriteEnabled; c.DepthBufferCompareFunc = DepthBufferCompareFunc; - c.StateDirty = StateDirty; c.Hash = Hash; c.StructSize = StructSize; return c; @@ -232,7 +225,6 @@ struct GothicDepthBufferStateInfo : public GothicPipelineState { c.DepthBufferCompareFunc = DepthBufferCompareFunc; c.StructSize = StructSize; - c.SetDirty(); } }; @@ -371,7 +363,6 @@ struct GothicBlendStateInfo : public GothicPipelineState { c.AlphaToCoverage = AlphaToCoverage; c.ColorWritesEnabled = ColorWritesEnabled; - c.StateDirty = StateDirty; c.Hash = Hash; c.StructSize = StructSize; return c; @@ -389,7 +380,6 @@ struct GothicBlendStateInfo : public GothicPipelineState { c.ColorWritesEnabled = ColorWritesEnabled; c.StructSize = StructSize; - c.SetDirty(); } }; @@ -1097,10 +1087,10 @@ struct GothicRendererState { TransformState.SetDefault(); RendererSettings.SetDefault(); - DepthState.SetDirty(); - BlendState.SetDirty(); - RasterizerState.SetDirty(); - SamplerState.SetDirty(); + DepthState.ComputeHash(); + BlendState.ComputeHash(); + RasterizerState.ComputeHash(); + SamplerState.ComputeHash(); } GothicDepthBufferStateInfo DepthState; diff --git a/D3D11Engine/SMAA/D3D11SMAA.cpp b/D3D11Engine/SMAA/D3D11SMAA.cpp index 0747a2be..284ed2dc 100644 --- a/D3D11Engine/SMAA/D3D11SMAA.cpp +++ b/D3D11Engine/SMAA/D3D11SMAA.cpp @@ -67,24 +67,8 @@ bool D3D11SMAA::Init() sampDesc.Filter = D3D11_FILTER_MIN_MAG_MIP_POINT; // Point filter m_device->CreateSamplerState(&sampDesc, m_samplerPoint.GetAddressOf()); - // 5. Create Helper States - D3D11_RASTERIZER_DESC rasterDesc = {}; - rasterDesc.FillMode = D3D11_FILL_SOLID; - rasterDesc.CullMode = D3D11_CULL_NONE; - rasterDesc.DepthClipEnable = true; - m_device->CreateRasterizerState(&rasterDesc, m_rasterizerState.GetAddressOf()); - - D3D11_DEPTH_STENCIL_DESC dsDesc = {}; - dsDesc.DepthEnable = FALSE; - dsDesc.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ZERO; - dsDesc.DepthFunc = D3D11_COMPARISON_ALWAYS; - m_device->CreateDepthStencilState(&dsDesc, m_disableDepthState.GetAddressOf()); - - // Default blend state (Opaque/Overwrite) - D3D11_BLEND_DESC blendDesc = {}; - blendDesc.RenderTarget[0].BlendEnable = FALSE; - blendDesc.RenderTarget[0].RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL; - m_device->CreateBlendState(&blendDesc, m_blendState.GetAddressOf()); + // Note: Rasterizer, depth-stencil, and blend states are managed by the caller + // through the Gothic state tracking system. return true; } @@ -131,9 +115,8 @@ void D3D11SMAA::Render(ID3D11ShaderResourceView* inputSRV, // Common State Setup m_context->IASetInputLayout(nullptr); // Using VertexID generation m_context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); - m_context->RSSetState(m_rasterizerState.Get()); - m_context->OMSetDepthStencilState(m_disableDepthState.Get(), 0); - m_context->OMSetBlendState(m_blendState.Get(), nullptr, 0xFFFFFFFF); + // Note: Rasterizer, depth-stencil, and blend states are configured by the caller + // through the Gothic state tracking system (Engine::GAPI->GetRendererState()). ID3D11SamplerState* samplers[] = { m_samplerLinear.Get(), m_samplerPoint.Get() }; m_context->PSSetSamplers(0, 2, samplers); @@ -220,7 +203,4 @@ void D3D11SMAA::ReleaseResources() { m_constantBuffer.Reset(); m_samplerLinear.Reset(); m_samplerPoint.Reset(); - m_rasterizerState.Reset(); - m_disableDepthState.Reset(); - m_blendState.Reset(); } diff --git a/D3D11Engine/SMAA/D3D11SMAA.h b/D3D11Engine/SMAA/D3D11SMAA.h index 6beece45..3339b469 100644 --- a/D3D11Engine/SMAA/D3D11SMAA.h +++ b/D3D11Engine/SMAA/D3D11SMAA.h @@ -68,9 +68,6 @@ class D3D11SMAA { Microsoft::WRL::ComPtr m_constantBuffer; Microsoft::WRL::ComPtr m_samplerLinear; Microsoft::WRL::ComPtr m_samplerPoint; - Microsoft::WRL::ComPtr m_rasterizerState; - Microsoft::WRL::ComPtr m_disableDepthState; - Microsoft::WRL::ComPtr m_blendState; // Default (overwrite off) int m_width; int m_height; From 664391a1dfa8337bc5df35aa31722bb3a93723d4 Mon Sep 17 00:00:00 2001 From: kirides <13602143+kirides@users.noreply.github.com> Date: Thu, 12 Mar 2026 15:48:46 +0100 Subject: [PATCH 14/42] reduce memory fragmentation causes --- D3D11Engine/D3D11GraphicsEngine.cpp | 42 +++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/D3D11Engine/D3D11GraphicsEngine.cpp b/D3D11Engine/D3D11GraphicsEngine.cpp index 2685d423..8ae326d2 100644 --- a/D3D11Engine/D3D11GraphicsEngine.cpp +++ b/D3D11Engine/D3D11GraphicsEngine.cpp @@ -8067,6 +8067,26 @@ void D3D11GraphicsEngine::BuildStaticGeometryBuffers() { // Track which MeshInfo* we've already added (same visual used by many vobs shares geometry) std::unordered_set processedMeshes; + // Pre-count vertices/indices to reserve and avoid incremental reallocation + { + size_t totalVertices = 0, totalIndices = 0; + std::unordered_set counted; + for ( auto const& [proto, visual] : Engine::GAPI->GetStaticMeshVisuals() ) { + for ( auto const& [meshKey, meshList] : visual->MeshesByTexture ) { + if ( m_TextureAtlasLookup.find( meshKey.Texture ) == m_TextureAtlasLookup.end() ) + continue; + for ( MeshInfo* mi : meshList ) { + if ( counted.insert( mi ).second ) { + totalVertices += mi->Vertices.size(); + totalIndices += mi->Indices.size(); + } + } + } + } + allVertices.reserve( totalVertices ); + allIndices.reserve( totalIndices ); + } + for ( auto const& [proto, visual] : Engine::GAPI->GetStaticMeshVisuals() ) { for ( auto const& [meshKey, meshList] : visual->MeshesByTexture ) { // Look up atlas descriptor for this texture @@ -8834,6 +8854,28 @@ void D3D11GraphicsEngine::BuildStaticWorldMeshBuffers() { std::unordered_set processedMeshes; + // Pre-count total vertices/indices to avoid incremental reallocation + { + size_t totalVertices = 0, totalIndices = 0, totalSubmeshes = 0; + auto& ws = Engine::GAPI->GetWorldSections(); + for ( auto& [x, row] : ws ) { + for ( auto& [y, section] : row ) { + for ( auto const& [meshKey, worldMeshInfo] : section.WorldMeshes ) { + if ( !meshKey.Material ) continue; + zCTexture* tex = meshKey.Material->GetTextureSingle(); + if ( m_WorldMeshDiffuseAtlasLookup.find( tex ) != m_WorldMeshDiffuseAtlasLookup.end() ) { + totalVertices += worldMeshInfo->Vertices.size(); + totalIndices += worldMeshInfo->Indices.size(); + totalSubmeshes++; + } + } + } + } + allVertices.reserve( totalVertices ); + allIndices.reserve( totalIndices ); + submeshGPU.reserve( totalSubmeshes ); + } + auto& worldSections = Engine::GAPI->GetWorldSections(); for ( auto& [x, row] : worldSections ) { for ( auto& [y, section] : row ) { From e521858124b1627d4054f481663db2dcef6cbfac Mon Sep 17 00:00:00 2001 From: kirides <13602143+kirides@users.noreply.github.com> Date: Thu, 12 Mar 2026 16:35:27 +0100 Subject: [PATCH 15/42] add toggles to disable atlas based drawing --- D3D11Engine/D3D11GraphicsEngine.cpp | 6 ++++-- D3D11Engine/GothicGraphicsState.h | 2 ++ D3D11Engine/ImGuiShim.cpp | 4 ++++ 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/D3D11Engine/D3D11GraphicsEngine.cpp b/D3D11Engine/D3D11GraphicsEngine.cpp index 8ae326d2..6e4b2cd4 100644 --- a/D3D11Engine/D3D11GraphicsEngine.cpp +++ b/D3D11Engine/D3D11GraphicsEngine.cpp @@ -604,6 +604,8 @@ XRESULT D3D11GraphicsEngine::Init() { // to support more memory intensive features, even on less than 4GB cards, by streaming in the necessary tiles. SupportTextureAtlases = true; Engine::GAPI->GetRendererState().RendererSettings.UseIndirectVobShadows = SupportTextureAtlases; + Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.FeatureSet.EnableAtlasStaticVobs = SupportTextureAtlases; + Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.FeatureSet.EnableAtlasWorldMesh = SupportTextureAtlases; } } @@ -8582,7 +8584,7 @@ void D3D11GraphicsEngine::BuildSceneTextureAtlasses() { m_TextureAtlasLookup.clear(); m_AtlasDrawGroups.clear(); - if ( !SupportTextureAtlases ) { + if ( !SupportTextureAtlases || !Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.FeatureSet.EnableAtlasStaticVobs ) { return; } @@ -8682,7 +8684,7 @@ void D3D11GraphicsEngine::BuildWorldMeshTextureAtlasses() { m_WorldMeshGlobalInstanceIdBuffer.reset(); m_WorldMeshSubmeshBuffer.reset(); - if ( !SupportTextureAtlases ) { + if ( !SupportTextureAtlases || !Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.FeatureSet.EnableAtlasWorldMesh ) { return; } diff --git a/D3D11Engine/GothicGraphicsState.h b/D3D11Engine/GothicGraphicsState.h index 0dd4b3ec..2d3ee31d 100644 --- a/D3D11Engine/GothicGraphicsState.h +++ b/D3D11Engine/GothicGraphicsState.h @@ -961,6 +961,8 @@ struct GothicRendererSettings { bool UseShadowAtlas; bool ForceFeatureLevel10; bool StreamingResourcesSupported; + bool EnableAtlasStaticVobs; + bool EnableAtlasWorldMesh; } FeatureSet; } DebugSettings; }; diff --git a/D3D11Engine/ImGuiShim.cpp b/D3D11Engine/ImGuiShim.cpp index 6b480e98..c04d0131 100644 --- a/D3D11Engine/ImGuiShim.cpp +++ b/D3D11Engine/ImGuiShim.cpp @@ -1352,6 +1352,10 @@ void RenderAdvancedColumn2( GothicRendererSettings& settings, GothicAPI* gapi ) ImGui::SetItemTooltip("Enables a less intensive but lower quality shadow solution."); ImGui::Checkbox("Force Feature Level 10", &settings.DebugSettings.FeatureSet.ForceFeatureLevel10 ); ImGui::SetItemTooltip("Force DirectX 10 era feature support. Requires restart."); + ImGui::Checkbox("Atlas Static Vobs", &settings.DebugSettings.FeatureSet.EnableAtlasStaticVobs ); + ImGui::SetItemTooltip("Enable texture atlas based rendering for static vobs (experimental, requires world reload)"); + ImGui::Checkbox("Atlas World Mesh", &settings.DebugSettings.FeatureSet.EnableAtlasWorldMesh ); + ImGui::SetItemTooltip("Enable texture atlas based rendering for world mesh (experimental, requires world reload)"); ImGui::EndTabItem(); } From a2d6413ee3f7cf6508eb1074807bd8b6e5ddc964 Mon Sep 17 00:00:00 2001 From: kirides <13602143+kirides@users.noreply.github.com> Date: Thu, 12 Mar 2026 16:35:57 +0100 Subject: [PATCH 16/42] ensure correct Mip lod bias when upscaling when rendering using Atlas --- D3D11Engine/D3D11GraphicsEngine.cpp | 7 ++++--- D3D11Engine/D3D11GraphicsEngine.h | 1 + D3D11Engine/Shaders/PS_DiffuseAtlas.hlsl | 10 +++++++--- D3D11Engine/Shaders/PS_WorldAtlas.hlsl | 21 +++++++++++++-------- 4 files changed, 25 insertions(+), 14 deletions(-) diff --git a/D3D11Engine/D3D11GraphicsEngine.cpp b/D3D11Engine/D3D11GraphicsEngine.cpp index 6e4b2cd4..1ab3ae98 100644 --- a/D3D11Engine/D3D11GraphicsEngine.cpp +++ b/D3D11Engine/D3D11GraphicsEngine.cpp @@ -285,6 +285,7 @@ void D3D11GraphicsEngine::CreateAndBindDefaultSampler() { float scaleRatio = static_cast(GetScaledResolution().x) / static_cast(GetBackbufferResolution().x); // Calculate raw bias, but clamp it to a maximum of 0.0f to protect Supersampling float mipBias = std::min(0.0f, std::log2(scaleRatio)); + m_SamplerMipBias = mipBias; D3D11_SAMPLER_DESC samplerDesc{}; samplerDesc.Filter = D3D11_FILTER_ANISOTROPIC; @@ -2998,13 +2999,13 @@ XRESULT D3D11GraphicsEngine::OnStartWorldRendering() { GetContext()->CSSetSamplers( 0, 1, DefaultSamplerState.GetAddressOf() ); // Update view distances - InfiniteRangeConstantBuffer->UpdateBuffer( float4( FLT_MAX, 0, 0, 0 ).toPtr() ); + InfiniteRangeConstantBuffer->UpdateBuffer( float4( FLT_MAX, m_SamplerMipBias, 0, 0 ).toPtr() ); OutdoorSmallVobsConstantBuffer->UpdateBuffer( float4( rendererState.RendererSettings.OutdoorSmallVobDrawRadius, - 0, 0, 0 ).toPtr() ); + m_SamplerMipBias, 0, 0 ).toPtr() ); OutdoorVobsConstantBuffer->UpdateBuffer( float4( rendererState.RendererSettings.OutdoorVobDrawRadius, - 0, 0, 0 ).toPtr() ); + m_SamplerMipBias, 0, 0 ).toPtr() ); rendererState.RasterizerState.FrontCounterClockwise = false; diff --git a/D3D11Engine/D3D11GraphicsEngine.h b/D3D11Engine/D3D11GraphicsEngine.h index de2e7cd1..72d68802 100644 --- a/D3D11Engine/D3D11GraphicsEngine.h +++ b/D3D11Engine/D3D11GraphicsEngine.h @@ -518,6 +518,7 @@ class D3D11GraphicsEngine : public D3D11GraphicsEngineBase { /** If true, we will save a screenshot after the next frame */ bool SaveScreenshotNextFrame; + float m_SamplerMipBias = 0.0f; bool m_flipWithTearing; bool m_swapchainflip; bool m_lowlatency; diff --git a/D3D11Engine/Shaders/PS_DiffuseAtlas.hlsl b/D3D11Engine/Shaders/PS_DiffuseAtlas.hlsl index 05c38975..fd167e57 100644 --- a/D3D11Engine/Shaders/PS_DiffuseAtlas.hlsl +++ b/D3D11Engine/Shaders/PS_DiffuseAtlas.hlsl @@ -20,7 +20,8 @@ cbuffer MI_MaterialInfo : register( b2 ) cbuffer DIST_Distance : register( b3 ) { float DIST_DrawDistance; - float3 DIST_Pad; + float DIST_LodBias; + float2 DIST_Pad; } //-------------------------------------------------------------------------------------- @@ -80,8 +81,11 @@ DEFERRED_PS_OUTPUT PSMain( PS_INPUT Input ) : SV_TARGET float slice = Input.vTexcoord3D.z; float2 atlasScale = Input.vAtlasRect.zw - Input.vAtlasRect.xy; // (uEnd-uStart, vEnd-vStart) - float2 gradX = ddx(rawUV) * atlasScale; - float2 gradY = ddy(rawUV) * atlasScale; + // SampleGrad ignores sampler MipLODBias, so we manually apply the LOD bias + // (needed for FSR upscaling to produce sharp textures at lower resolutions) + float biasFactor = exp2(DIST_LodBias); + float2 gradX = ddx(rawUV) * atlasScale * biasFactor; + float2 gradY = ddy(rawUV) * atlasScale * biasFactor; float2 atlasUV = Input.vAtlasRect.xy + frac(rawUV) * atlasScale; float4 color = TX_AtlasArray.SampleGrad(SS_Linear, float3(atlasUV, slice), gradX, gradY); diff --git a/D3D11Engine/Shaders/PS_WorldAtlas.hlsl b/D3D11Engine/Shaders/PS_WorldAtlas.hlsl index 4845eea0..33323cde 100644 --- a/D3D11Engine/Shaders/PS_WorldAtlas.hlsl +++ b/D3D11Engine/Shaders/PS_WorldAtlas.hlsl @@ -21,7 +21,8 @@ cbuffer MI_MaterialInfo : register( b2 ) cbuffer DIST_Distance : register( b3 ) { float DIST_DrawDistance; - float3 DIST_Pad; + float DIST_LodBias; + float2 DIST_Pad; } //-------------------------------------------------------------------------------------- @@ -75,13 +76,16 @@ float2 CalculateVelocity(float4 currClipPos, float4 prevClipPos) // so that at higher mips the border grows to prevent bilinear bleed into neighbors. static const float ATLAS_SIZE = 2048.0; -float4 SampleAtlas(Texture2DArray atlas, SamplerState ss, float3 rawUVSlice, float4 atlasRect) +float4 SampleAtlas(Texture2DArray atlas, SamplerState ss, float3 rawUVSlice, float4 atlasRect, float lodBias) { float2 rawUV = rawUVSlice.xy; float slice = rawUVSlice.z; float2 scale = atlasRect.zw - atlasRect.xy; - float2 gradX = ddx(rawUV) * scale; - float2 gradY = ddy(rawUV) * scale; + // SampleGrad ignores sampler MipLODBias, so we manually apply the LOD bias + // (needed for FSR upscaling to produce sharp textures at lower resolutions) + float biasFactor = exp2(lodBias); + float2 gradX = ddx(rawUV) * scale * biasFactor; + float2 gradY = ddy(rawUV) * scale * biasFactor; // Compute approximate mip level from gradients float2 dxTex = gradX * ATLAS_SIZE; @@ -106,7 +110,7 @@ DEFERRED_PS_OUTPUT PSMain( PS_INPUT Input ) : SV_TARGET output.vReactiveMask = 0.0f; // --- Diffuse --- - float4 color = SampleAtlas(TX_AtlasDiffuse, SS_Linear, Input.vTexcoord3D, Input.vAtlasRect); + float4 color = SampleAtlas(TX_AtlasDiffuse, SS_Linear, Input.vTexcoord3D, Input.vAtlasRect, DIST_LodBias); // Alpha test if (Input.vFlags & 4u) @@ -126,8 +130,9 @@ DEFERRED_PS_OUTPUT PSMain( PS_INPUT Input ) : SV_TARGET float2 rawUV = Input.vNormalAtlas3D.xy; float slice = Input.vNormalAtlas3D.z; float2 scale = nrmAtlasRect.zw - nrmAtlasRect.xy; - float2 gradX = ddx(rawUV) * scale; - float2 gradY = ddy(rawUV) * scale; + float biasFactor = exp2(DIST_LodBias); + float2 gradX = ddx(rawUV) * scale * biasFactor; + float2 gradY = ddy(rawUV) * scale * biasFactor; float2 atlasUV = nrmAtlasRect.xy + frac(rawUV) * scale; nrm = perturb_normal_from_grad( @@ -148,7 +153,7 @@ DEFERRED_PS_OUTPUT PSMain( PS_INPUT Input ) : SV_TARGET float4 fx = 1.0f; if (Input.vFlags & 2u) { - fx = SampleAtlas(TX_AtlasFx, SS_Linear, Input.vFxAtlas3D, Input.vFxAtlasRect); + fx = SampleAtlas(TX_AtlasFx, SS_Linear, Input.vFxAtlas3D, Input.vFxAtlasRect, DIST_LodBias); } output.vDiffuse = float4(color.rgb, Input.vDiffuse.y); From 8cbf51cdf30098375ae7b70048a451f8c5daacc3 Mon Sep 17 00:00:00 2001 From: kirides <13602143+kirides@users.noreply.github.com> Date: Thu, 12 Mar 2026 16:49:19 +0100 Subject: [PATCH 17/42] don't hard-code atlas size --- D3D11Engine/Shaders/PS_WorldAtlas.hlsl | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/D3D11Engine/Shaders/PS_WorldAtlas.hlsl b/D3D11Engine/Shaders/PS_WorldAtlas.hlsl index 33323cde..08910c38 100644 --- a/D3D11Engine/Shaders/PS_WorldAtlas.hlsl +++ b/D3D11Engine/Shaders/PS_WorldAtlas.hlsl @@ -74,8 +74,6 @@ float2 CalculateVelocity(float4 currClipPos, float4 prevClipPos) // Helper: sample from an atlas Texture2DArray with correct mip via SampleGrad + frac() // Clamps the final atlas UV inside the entry boundary, scaled by the mip level // so that at higher mips the border grows to prevent bilinear bleed into neighbors. -static const float ATLAS_SIZE = 2048.0; - float4 SampleAtlas(Texture2DArray atlas, SamplerState ss, float3 rawUVSlice, float4 atlasRect, float lodBias) { float2 rawUV = rawUVSlice.xy; @@ -87,14 +85,18 @@ float4 SampleAtlas(Texture2DArray atlas, SamplerState ss, float3 rawUVSlice, flo float2 gradX = ddx(rawUV) * scale * biasFactor; float2 gradY = ddy(rawUV) * scale * biasFactor; + // Query actual atlas dimensions instead of assuming a fixed size + float atlasW, atlasH, atlasSlices; + atlas.GetDimensions(atlasW, atlasH, atlasSlices); + // Compute approximate mip level from gradients - float2 dxTex = gradX * ATLAS_SIZE; - float2 dyTex = gradY * ATLAS_SIZE; + float2 dxTex = gradX * atlasW; + float2 dyTex = gradY * atlasH; float maxSq = max(dot(dxTex, dxTex), dot(dyTex, dyTex)); float mipLevel = max(0.0, 0.5 * log2(maxSq)); // Scale the half-texel border by 2^mip so it covers the filter footprint at that level - float border = (0.5 / ATLAS_SIZE) * exp2(ceil(mipLevel)); + float2 border = (0.5 / float2(atlasW, atlasH)) * exp2(ceil(mipLevel)); float2 atlasUV = atlasRect.xy + frac(rawUV) * scale; atlasUV = clamp(atlasUV, atlasRect.xy + border, atlasRect.zw - border); From f077a4f9854d1b878333c3a317a0a225704b75b3 Mon Sep 17 00:00:00 2001 From: kirides <13602143+kirides@users.noreply.github.com> Date: Sat, 14 Mar 2026 09:28:52 +0100 Subject: [PATCH 18/42] reduce GodRay banding and visual artifacts by increasing samples to 64 and adding dithering, ensure clamping behavior for Godray zoom --- D3D11Engine/D3D11GraphicsEngine.h | 1 - 1 file changed, 1 deletion(-) diff --git a/D3D11Engine/D3D11GraphicsEngine.h b/D3D11Engine/D3D11GraphicsEngine.h index 72d68802..6a68d3e6 100644 --- a/D3D11Engine/D3D11GraphicsEngine.h +++ b/D3D11Engine/D3D11GraphicsEngine.h @@ -391,7 +391,6 @@ class D3D11GraphicsEngine : public D3D11GraphicsEngineBase { RenderToTextureBuffer* GetVelocityBuffer() const { return VelocityBuffer.get(); } const XMFLOAT4X4& GetPrevViewProjMatrix() const { return m_PrevViewProjMatrix; } - auto GetClampSamplerState() -> auto { return ClampSamplerState.Get(); } auto GetCubeSamplerState() -> auto { return CubeSamplerState.Get(); } auto GetLinearSamplerState() -> auto { return LinearSamplerState.Get(); } From 03a6606f8842705dd651d1a2bfb236a63ba1af0b Mon Sep 17 00:00:00 2001 From: kirides <13602143+kirides@users.noreply.github.com> Date: Sat, 14 Mar 2026 20:03:41 +0100 Subject: [PATCH 19/42] fallback textures --- D3D11Engine/D3D11GraphicsEngine.cpp | 36 +++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/D3D11Engine/D3D11GraphicsEngine.cpp b/D3D11Engine/D3D11GraphicsEngine.cpp index 1ab3ae98..88688309 100644 --- a/D3D11Engine/D3D11GraphicsEngine.cpp +++ b/D3D11Engine/D3D11GraphicsEngine.cpp @@ -3030,7 +3030,7 @@ XRESULT D3D11GraphicsEngine::OnStartWorldRendering() { GetContext()->ClearRenderTargetView( graph.GetPhysicalTexture( colorResource )->GetRenderTargetView().Get(), reinterpret_cast(&fogColor) ); }; }); - + if ( rendererState.RendererSettings.DrawSky ) { graph.AddPass( L"Draw Sky", [&]( RGBuilder& builder, RenderPass& pass ) { //// Setup / Declare @@ -3038,13 +3038,13 @@ XRESULT D3D11GraphicsEngine::OnStartWorldRendering() { //albedoTarget = builder.CreateTexture( albedoDesc ); builder.Write( colorResource ); - pass.m_executeCallback = [this, colorResource](const RenderGraph& graph)->void { + pass.m_executeCallback = [this, colorResource]( const RenderGraph& graph )->void { // Draw back of the sky if outdoor GetContext()->OMSetRenderTargets( 1, graph.GetPhysicalTexture( colorResource )->GetRenderTargetView().GetAddressOf(), nullptr ); - + DrawSky(); }; - }); + } ); } RGResourceHandle normalsResource; @@ -3148,8 +3148,8 @@ XRESULT D3D11GraphicsEngine::OnStartWorldRendering() { m_FrameLights.clear(); } }; - }); - + }); + graph.AddPass( L"Draw Frame AlphaMeshes", [&]( RGBuilder& builder, RenderPass& pass ) { // Setup / Declare builder.Write( backBufferHandle ); @@ -8600,20 +8600,36 @@ void D3D11GraphicsEngine::BuildSceneTextureAtlasses() { for ( auto vobInfo : m_StaticVobs ) { for ( auto& byTex : reinterpret_cast(vobInfo->VisualInfo)->MeshesByTexture ) { zCTexture* tex = byTex.first.Texture; - if ( !tex || !seenTextures.insert( tex ).second ) + if ( !tex ) { + tex = byTex.first.Material->GetTexture(); + } + + if ( !tex ) { + tex = byTex.first.Material->GetAniTexture(); + } + + if ( !tex || !seenTextures.insert( tex ).second ) { + LogError() << "Texture not found for visual " << vobInfo->VisualInfo->VisualName; continue; // skip nulls and duplicates + } auto cachedState = tex->CacheIn( -1 ); - if ( cachedState != zRES_CACHED_IN ) + if ( cachedState != zRES_CACHED_IN ) { + LogError() << "Texture " << tex->GetName() << " was not cached in"; continue; + } auto surface = tex->GetSurface(); - if ( !surface || !surface->IsSurfaceReady() ) + if ( !surface || !surface->IsSurfaceReady() ) { + LogError() << "Texture " << tex->GetName() << " surface not ready"; continue; + } auto engineTex = surface->GetEngineTexture(); - if ( !engineTex ) + if ( !engineTex ) { + LogError() << "Texture " << tex->GetName() << " no engine texture"; continue; + } D3D11_TEXTURE2D_DESC desc; engineTex->GetTextureObject()->GetDesc( &desc ); From 3b2a119d808018ca7f2c95acff78835b78dde979 Mon Sep 17 00:00:00 2001 From: kirides <13602143+kirides@users.noreply.github.com> Date: Sat, 14 Mar 2026 20:08:44 +0100 Subject: [PATCH 20/42] note --- D3D11Engine/Shaders/PS_DS_AtmosphericScattering.hlsl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/D3D11Engine/Shaders/PS_DS_AtmosphericScattering.hlsl b/D3D11Engine/Shaders/PS_DS_AtmosphericScattering.hlsl index 92e8103d..69449c11 100644 --- a/D3D11Engine/Shaders/PS_DS_AtmosphericScattering.hlsl +++ b/D3D11Engine/Shaders/PS_DS_AtmosphericScattering.hlsl @@ -699,6 +699,9 @@ float4 PSMain(PS_INPUT Input) : SV_TARGET // If we dont have a normal, just return the diffuse color if (gb2.w < 0.001f) + // FIXME: This is just so the sky is correctly rendered when atmospheric scattering is enabled + // Ideally we should draw the sky in a separate forward after applying the lighting, but that breaks other stuff. + // For now, we need the alpha channel and can't really make use of Octahedral encoding for the normals if we want to keep the sky rendering working, so we'll just return the diffuse color here and skip all lighting calculations for pixels without normals (mostly sky) return float4(diffuse.rgb, 1); // Decode the view-space normal back From 324ff5c2077688541e49021a76bcbabb7848dbbe Mon Sep 17 00:00:00 2001 From: kirides <13602143+kirides@users.noreply.github.com> Date: Sat, 14 Mar 2026 20:31:11 +0100 Subject: [PATCH 21/42] higher precision normals using 10-bit normals --- D3D11Engine/D3D11GraphicsEngine.cpp | 2 +- D3D11Engine/D3D11NVHBAO.cpp | 2 +- D3D11Engine/Shaders/DS_Defines.h | 18 +++++++----------- D3D11Engine/Shaders/PS_AtmosphereGround.hlsl | 3 +-- .../Shaders/PS_DS_AtmosphericScattering.hlsl | 7 ++++--- D3D11Engine/Shaders/PS_DS_PointLight.hlsl | 4 ++-- .../Shaders/PS_DS_PointLightDynShadow.hlsl | 4 ++-- D3D11Engine/Shaders/PS_Diffuse.hlsl | 3 +-- D3D11Engine/Shaders/PS_DiffuseAtlas.hlsl | 3 +-- D3D11Engine/Shaders/PS_Grass.hlsl | 3 +-- D3D11Engine/Shaders/PS_World.hlsl | 3 +-- D3D11Engine/Shaders/PS_WorldAtlas.hlsl | 3 +-- 12 files changed, 23 insertions(+), 32 deletions(-) diff --git a/D3D11Engine/D3D11GraphicsEngine.cpp b/D3D11Engine/D3D11GraphicsEngine.cpp index 88688309..52717ead 100644 --- a/D3D11Engine/D3D11GraphicsEngine.cpp +++ b/D3D11Engine/D3D11GraphicsEngine.cpp @@ -3053,7 +3053,7 @@ XRESULT D3D11GraphicsEngine::OnStartWorldRendering() { graph.AddPass( L"G-Buffer Pass", [&]( RGBuilder& builder, RenderPass& pass ) { // Setup / Declare auto size = GetResolution(); - normalsResource = builder.CreateTexture({ static_cast(size.x), static_cast(size.y), DXGI_FORMAT_R8G8B8A8_SNORM, L"GBufferNormals" }); + normalsResource = builder.CreateTexture({ static_cast(size.x), static_cast(size.y), DXGI_FORMAT_R10G10B10A2_UNORM, L"GBufferNormals" }); specularResource = builder.CreateTexture({ static_cast(size.x), static_cast(size.y), DXGI_FORMAT_R16G16_FLOAT, L"GBufferSpecular" }); reactiveMaskResource = builder.CreateTexture({ static_cast(size.x), static_cast(size.y), DXGI_FORMAT_R8_UNORM, L"ReactiveMask" }); diff --git a/D3D11Engine/D3D11NVHBAO.cpp b/D3D11Engine/D3D11NVHBAO.cpp index a10f168b..f9871ccb 100644 --- a/D3D11Engine/D3D11NVHBAO.cpp +++ b/D3D11Engine/D3D11NVHBAO.cpp @@ -58,7 +58,7 @@ XRESULT D3D11NVHBAO::Render( Input.DepthData.ProjectionMatrix.Layout = GFSDK_SSAO_COLUMN_MAJOR_ORDER; Input.DepthData.MetersToViewSpaceUnits = settings.MetersToViewSpaceUnits; - Input.NormalData.Enable = true; + Input.NormalData.Enable = false; Input.NormalData.pFullResNormalTextureSRV = pFullResNormalTexSRV.Get(); auto identity = XMMatrixIdentity(); Input.NormalData.WorldToViewMatrix.Data = GFSDK_SSAO_Float4x4( reinterpret_cast(&identity) ); // We already have them in view-space diff --git a/D3D11Engine/Shaders/DS_Defines.h b/D3D11Engine/Shaders/DS_Defines.h index 5f31d5fe..198b3bad 100644 --- a/D3D11Engine/Shaders/DS_Defines.h +++ b/D3D11Engine/Shaders/DS_Defines.h @@ -16,18 +16,14 @@ struct DEFERRED_PS_OUTPUT_ALPHA_TO_COVERAGE -float2 EncodeNormal(float3 n) +// Encode a normalized view-space normal [-1,1] to UNORM [0,1] for R10G10B10A2_UNORM storage +float4 EncodeNormalGBuffer(float3 n, float alpha) { - float f = sqrt(8*n.z+8); - return n.xy / f + 0.5; + return float4(n * 0.5 + 0.5, alpha); } -float3 DecodeNormal(float2 enc) + +// Decode a UNORM [0,1] sample back to a normalized view-space normal [-1,1] +float3 DecodeNormalGBuffer(float3 encoded) { - float2 fenc = enc.xy*4-2; - float f = dot(fenc,fenc); - float g = sqrt(1-f/4); - float3 n; - n.xy = fenc*g; - n.z = 1-f/2; - return n; + return normalize(encoded * 2.0 - 1.0); } diff --git a/D3D11Engine/Shaders/PS_AtmosphereGround.hlsl b/D3D11Engine/Shaders/PS_AtmosphereGround.hlsl index e36f2496..e1b99a31 100644 --- a/D3D11Engine/Shaders/PS_AtmosphereGround.hlsl +++ b/D3D11Engine/Shaders/PS_AtmosphereGround.hlsl @@ -83,8 +83,7 @@ DEFERRED_PS_OUTPUT PSMain( PS_INPUT Input ) : SV_TARGET DEFERRED_PS_OUTPUT output; output.vDiffuse = float4(color.rgb, Input.vDiffuse.y); - output.vNrm.xyz = nrm; - output.vNrm.w = 1.0f; + output.vNrm = EncodeNormalGBuffer(nrm, 1.0f); output.vSI_SP.x = MI_SpecularIntensity; output.vSI_SP.y = MI_SpecularPower; diff --git a/D3D11Engine/Shaders/PS_DS_AtmosphericScattering.hlsl b/D3D11Engine/Shaders/PS_DS_AtmosphericScattering.hlsl index 69449c11..cdef23cf 100644 --- a/D3D11Engine/Shaders/PS_DS_AtmosphericScattering.hlsl +++ b/D3D11Engine/Shaders/PS_DS_AtmosphericScattering.hlsl @@ -701,11 +701,12 @@ float4 PSMain(PS_INPUT Input) : SV_TARGET if (gb2.w < 0.001f) // FIXME: This is just so the sky is correctly rendered when atmospheric scattering is enabled // Ideally we should draw the sky in a separate forward after applying the lighting, but that breaks other stuff. - // For now, we need the alpha channel and can't really make use of Octahedral encoding for the normals if we want to keep the sky rendering working, so we'll just return the diffuse color here and skip all lighting calculations for pixels without normals (mostly sky) + // For now, we need the alpha channel (2-bit in R10G10B10A2_UNORM) to keep the sky rendering working, + // so we can't use Octahedral encoding. We return the diffuse color here and skip lighting for sky pixels. return float4(diffuse.rgb, 1); - // Decode the view-space normal back - float3 normal = normalize(gb2.xyz); + // Decode the view-space normal back from R10G10B10A2_UNORM + float3 normal = DecodeNormalGBuffer(gb2.xyz); // Get specular parameters float4 gb3 = TX_SI_SP.Sample(SS_Linear, uv); diff --git a/D3D11Engine/Shaders/PS_DS_PointLight.hlsl b/D3D11Engine/Shaders/PS_DS_PointLight.hlsl index 9c7f3495..196ab2db 100644 --- a/D3D11Engine/Shaders/PS_DS_PointLight.hlsl +++ b/D3D11Engine/Shaders/PS_DS_PointLight.hlsl @@ -102,8 +102,8 @@ float4 PSMain( PS_INPUT Input ) : SV_TARGET // Get the second GBuffer float4 gb2 = TX_Nrm.Sample(SS_Linear, uv); - // Decode the view-space normal back - float3 normal = normalize(gb2.xyz); + // Decode the view-space normal back from R10G10B10A2_UNORM + float3 normal = DecodeNormalGBuffer(gb2.xyz); // Get specular parameters float4 gb3 = TX_SI_SP.Sample(SS_Linear, uv); diff --git a/D3D11Engine/Shaders/PS_DS_PointLightDynShadow.hlsl b/D3D11Engine/Shaders/PS_DS_PointLightDynShadow.hlsl index 8c80e67e..910c25b6 100644 --- a/D3D11Engine/Shaders/PS_DS_PointLightDynShadow.hlsl +++ b/D3D11Engine/Shaders/PS_DS_PointLightDynShadow.hlsl @@ -146,8 +146,8 @@ float4 PSMain( PS_INPUT Input ) : SV_TARGET // Get the second GBuffer float4 gb2 = TX_Nrm.Sample(SS_Linear, uv); - // Decode the view-space normal back - float3 normal = normalize(gb2.xyz); + // Decode the view-space normal back from R10G10B10A2_UNORM + float3 normal = DecodeNormalGBuffer(gb2.xyz); // Get specular parameters float4 gb3 = TX_SI_SP.Sample(SS_Linear, uv); diff --git a/D3D11Engine/Shaders/PS_Diffuse.hlsl b/D3D11Engine/Shaders/PS_Diffuse.hlsl index 9af0b79c..ed335666 100644 --- a/D3D11Engine/Shaders/PS_Diffuse.hlsl +++ b/D3D11Engine/Shaders/PS_Diffuse.hlsl @@ -120,8 +120,7 @@ DEFERRED_PS_OUTPUT PSMain( PS_INPUT Input ) : SV_TARGET //output.vDiffuse = float4(Input.vTexcoord2, 0, 1); //output.vDiffuse = float4(Input.vNormalVS, 1); - output.vNrm.xyz = nrm; - output.vNrm.w = 1.0f; + output.vNrm = EncodeNormalGBuffer(nrm, 1.0f); output.vSI_SP.x = MI_SpecularIntensity * fx.r; output.vSI_SP.y = MI_SpecularPower * fx.g; diff --git a/D3D11Engine/Shaders/PS_DiffuseAtlas.hlsl b/D3D11Engine/Shaders/PS_DiffuseAtlas.hlsl index fd167e57..11affa27 100644 --- a/D3D11Engine/Shaders/PS_DiffuseAtlas.hlsl +++ b/D3D11Engine/Shaders/PS_DiffuseAtlas.hlsl @@ -102,8 +102,7 @@ DEFERRED_PS_OUTPUT PSMain( PS_INPUT Input ) : SV_TARGET output.vDiffuse = float4(color.rgb, Input.vDiffuse.y); - output.vNrm.xyz = nrm; - output.vNrm.w = 1.0f; + output.vNrm = EncodeNormalGBuffer(nrm, 1.0f); output.vSI_SP.x = MI_SpecularIntensity * fx.r; output.vSI_SP.y = MI_SpecularPower * fx.g; diff --git a/D3D11Engine/Shaders/PS_Grass.hlsl b/D3D11Engine/Shaders/PS_Grass.hlsl index 5f12bed1..f393d5a0 100644 --- a/D3D11Engine/Shaders/PS_Grass.hlsl +++ b/D3D11Engine/Shaders/PS_Grass.hlsl @@ -65,8 +65,7 @@ DEFERRED_PS_OUTPUT PSMain( PS_INPUT Input ) : SV_TARGET DEFERRED_PS_OUTPUT output; output.vDiffuse = float4(color.rgb, 1); - output.vNrm.xyz = normalize(Input.vNormalVS); - output.vNrm.w = 1.0f; + output.vNrm = EncodeNormalGBuffer(normalize(Input.vNormalVS), 1.0f); output.vSI_SP.xy = 0; diff --git a/D3D11Engine/Shaders/PS_World.hlsl b/D3D11Engine/Shaders/PS_World.hlsl index 661c3785..d0b8eb62 100644 --- a/D3D11Engine/Shaders/PS_World.hlsl +++ b/D3D11Engine/Shaders/PS_World.hlsl @@ -85,8 +85,7 @@ DEFERRED_PS_OUTPUT PSMain( PS_INPUT Input ) : SV_TARGET DEFERRED_PS_OUTPUT output; output.vDiffuse = float4(color.rgb, Input.vDiffuse.y); - output.vNrm.xyz = normalize(Input.vNormalVS); - output.vNrm.w = 1.0f; + output.vNrm = EncodeNormalGBuffer(normalize(Input.vNormalVS), 1.0f); output.vSI_SP.x = MI_SpecularIntensity; output.vSI_SP.y = MI_SpecularPower; diff --git a/D3D11Engine/Shaders/PS_WorldAtlas.hlsl b/D3D11Engine/Shaders/PS_WorldAtlas.hlsl index 08910c38..415c4f17 100644 --- a/D3D11Engine/Shaders/PS_WorldAtlas.hlsl +++ b/D3D11Engine/Shaders/PS_WorldAtlas.hlsl @@ -160,8 +160,7 @@ DEFERRED_PS_OUTPUT PSMain( PS_INPUT Input ) : SV_TARGET output.vDiffuse = float4(color.rgb, Input.vDiffuse.y); - output.vNrm.xyz = nrm; - output.vNrm.w = 1.0f; + output.vNrm = EncodeNormalGBuffer(nrm, 1.0f); output.vSI_SP.x = MI_SpecularIntensity * fx.r; output.vSI_SP.y = MI_SpecularPower * fx.g; From 19c5439a4f0305fe24fb24e03a39bc4ae28b1d25 Mon Sep 17 00:00:00 2001 From: kirides <13602143+kirides@users.noreply.github.com> Date: Sat, 14 Mar 2026 20:55:35 +0100 Subject: [PATCH 22/42] increase precision of normals by using RG16_SNORM with octahedral encoding, do "sky" tests usind depth copy --- D3D11Engine/D3D11GraphicsEngine.cpp | 8 +++--- D3D11Engine/D3D11PFX_GodRays.cpp | 4 +-- D3D11Engine/D3D11PFX_GodRays.h | 2 +- D3D11Engine/D3D11PfxRenderer.cpp | 4 +-- D3D11Engine/D3D11PfxRenderer.h | 2 +- D3D11Engine/Shaders/DS_Defines.h | 25 +++++++++++++------ D3D11Engine/Shaders/PS_AtmosphereGround.hlsl | 2 +- .../Shaders/PS_DS_AtmosphericScattering.hlsl | 21 +++++++--------- D3D11Engine/Shaders/PS_DS_PointLight.hlsl | 6 ++--- .../Shaders/PS_DS_PointLightDynShadow.hlsl | 6 ++--- D3D11Engine/Shaders/PS_Diffuse.hlsl | 2 +- D3D11Engine/Shaders/PS_DiffuseAtlas.hlsl | 2 +- D3D11Engine/Shaders/PS_Grass.hlsl | 2 +- D3D11Engine/Shaders/PS_PFX_GodRayMask.hlsl | 7 +++--- D3D11Engine/Shaders/PS_World.hlsl | 2 +- D3D11Engine/Shaders/PS_WorldAtlas.hlsl | 2 +- 16 files changed, 51 insertions(+), 46 deletions(-) diff --git a/D3D11Engine/D3D11GraphicsEngine.cpp b/D3D11Engine/D3D11GraphicsEngine.cpp index 52717ead..f10f8191 100644 --- a/D3D11Engine/D3D11GraphicsEngine.cpp +++ b/D3D11Engine/D3D11GraphicsEngine.cpp @@ -3053,7 +3053,7 @@ XRESULT D3D11GraphicsEngine::OnStartWorldRendering() { graph.AddPass( L"G-Buffer Pass", [&]( RGBuilder& builder, RenderPass& pass ) { // Setup / Declare auto size = GetResolution(); - normalsResource = builder.CreateTexture({ static_cast(size.x), static_cast(size.y), DXGI_FORMAT_R10G10B10A2_UNORM, L"GBufferNormals" }); + normalsResource = builder.CreateTexture({ static_cast(size.x), static_cast(size.y), DXGI_FORMAT_R16G16_SNORM, L"GBufferNormals" }); specularResource = builder.CreateTexture({ static_cast(size.x), static_cast(size.y), DXGI_FORMAT_R16G16_FLOAT, L"GBufferSpecular" }); reactiveMaskResource = builder.CreateTexture({ static_cast(size.x), static_cast(size.y), DXGI_FORMAT_R8_UNORM, L"ReactiveMask" }); @@ -3308,19 +3308,17 @@ XRESULT D3D11GraphicsEngine::OnStartWorldRendering() { Engine::GAPI->GetLoadedWorldInfo()->BspTree->GetBspTreeMode() == zBSP_MODE_OUTDOOR) { graph.AddPass( L"Draw Godrays", [&]( RGBuilder& builder, RenderPass& pass ) { - builder.Read( normalsResource ); builder.Read( backBufferHandle ); builder.Write( backBufferHandle ); - pass.m_executeCallback = [this, backBufferHandle, normalsResource](const RenderGraph& graph) { + pass.m_executeCallback = [this, backBufferHandle](const RenderGraph& graph) { // Unbind temporary backbuffer copy Microsoft::WRL::ComPtr srv; GetContext()->PSSetShaderResources( 5, 1, srv.GetAddressOf() ); auto backbufferResource = graph.GetPhysicalTexture(backBufferHandle); - auto normalsTexture = graph.GetPhysicalTexture(normalsResource); - PfxRenderer->RenderGodRays(backbufferResource->GetShaderResView().Get(), normalsTexture->GetShaderResView().Get()); + PfxRenderer->RenderGodRays(backbufferResource->GetShaderResView().Get(), GetDepthBufferCopy()->GetShaderResView().Get()); // Godrays bind a different sampler GetContext()->PSSetSamplers( 0, 1, DefaultSamplerState.GetAddressOf() ); }; diff --git a/D3D11Engine/D3D11PFX_GodRays.cpp b/D3D11Engine/D3D11PFX_GodRays.cpp index 87319373..96c02f70 100644 --- a/D3D11Engine/D3D11PFX_GodRays.cpp +++ b/D3D11Engine/D3D11PFX_GodRays.cpp @@ -19,7 +19,7 @@ D3D11PFX_GodRays::~D3D11PFX_GodRays() {} /** Draws this effect to the given buffer */ XRESULT D3D11PFX_GodRays::Render( ID3D11ShaderResourceView* backbuffer, - ID3D11ShaderResourceView* normals ) { + ID3D11ShaderResourceView* depth ) { if ( Engine::GAPI->GetSky()->GetAtmoshpereSettings().LightDirection.y <= 0 ) return XR_SUCCESS; // Don't render the godrays in the night-time @@ -82,7 +82,7 @@ XRESULT D3D11PFX_GodRays::Render( ID3D11ShaderResourceView* srvs[2] { backbuffer, - normals, + depth, }; engine->GetContext()->PSSetShaderResources( 0, 2, srvs ); diff --git a/D3D11Engine/D3D11PFX_GodRays.h b/D3D11Engine/D3D11PFX_GodRays.h index f5f97fe1..20b51ed1 100644 --- a/D3D11Engine/D3D11PFX_GodRays.h +++ b/D3D11Engine/D3D11PFX_GodRays.h @@ -9,6 +9,6 @@ class D3D11PFX_GodRays : /** Draws this effect to the given buffer */ XRESULT Render( RenderToTextureBuffer* fxbuffer ) override { return XR_FAILED; } - XRESULT Render( ID3D11ShaderResourceView* backbuffer, ID3D11ShaderResourceView* normals ); + XRESULT Render( ID3D11ShaderResourceView* backbuffer, ID3D11ShaderResourceView* depth ); }; diff --git a/D3D11Engine/D3D11PfxRenderer.cpp b/D3D11Engine/D3D11PfxRenderer.cpp index 7a60f8d2..848763be 100644 --- a/D3D11Engine/D3D11PfxRenderer.cpp +++ b/D3D11Engine/D3D11PfxRenderer.cpp @@ -68,8 +68,8 @@ XRESULT D3D11PfxRenderer::RenderHeightfog() { } /** Renders the godrays-Effect */ -XRESULT D3D11PfxRenderer::RenderGodRays(ID3D11ShaderResourceView* backbuffer, ID3D11ShaderResourceView* normals) { - return FX_GodRays->Render( backbuffer , normals ); +XRESULT D3D11PfxRenderer::RenderGodRays(ID3D11ShaderResourceView* backbuffer, ID3D11ShaderResourceView* depth) { + return FX_GodRays->Render( backbuffer , depth ); } /** Renders the depth-of-field effect */ diff --git a/D3D11Engine/D3D11PfxRenderer.h b/D3D11Engine/D3D11PfxRenderer.h index e1fafcb1..10f38389 100644 --- a/D3D11Engine/D3D11PfxRenderer.h +++ b/D3D11Engine/D3D11PfxRenderer.h @@ -47,7 +47,7 @@ class D3D11PfxRenderer { XRESULT RenderSimpleSharpen( const Microsoft::WRL::ComPtr& input, INT2 inputSize, const Microsoft::WRL::ComPtr& output, INT2 outputSize, RenderToTextureBuffer& intermediateBuffer ); /** Renders the godrays-Effect */ - XRESULT RenderGodRays(ID3D11ShaderResourceView* backbuffer, ID3D11ShaderResourceView* normals); + XRESULT RenderGodRays(ID3D11ShaderResourceView* backbuffer, ID3D11ShaderResourceView* depth); /** Renders the depth-of-field effect */ XRESULT RenderDepthOfField(ID3D11ShaderResourceView* backbuffer); diff --git a/D3D11Engine/Shaders/DS_Defines.h b/D3D11Engine/Shaders/DS_Defines.h index 198b3bad..a162a26a 100644 --- a/D3D11Engine/Shaders/DS_Defines.h +++ b/D3D11Engine/Shaders/DS_Defines.h @@ -1,7 +1,7 @@ struct DEFERRED_PS_OUTPUT { float4 vDiffuse : SV_TARGET0; - float4 vNrm : SV_TARGET1; + float2 vNrm : SV_TARGET1; float2 vSI_SP : SV_TARGET2; float2 vVelocity : SV_TARGET3; // Screen-space velocity for motion vectors float vReactiveMask : SV_TARGET4; // Screen-space velocity for motion vectors @@ -14,16 +14,25 @@ struct DEFERRED_PS_OUTPUT_ALPHA_TO_COVERAGE uint fCoverage : SV_Coverage; }; +// Octahedral encoding: map a unit normal to [-1,1]^2 for R16G16_SNORM storage +// Reference: "A Survey of Efficient Representations for Independent Unit Vectors" (Cigolle et al. 2014) +float2 OctWrap(float2 v) +{ + return (1.0 - abs(v.yx)) * (v.xy >= 0.0 ? 1.0 : -1.0); +} - -// Encode a normalized view-space normal [-1,1] to UNORM [0,1] for R10G10B10A2_UNORM storage -float4 EncodeNormalGBuffer(float3 n, float alpha) +float2 EncodeNormalGBuffer(float3 n) { - return float4(n * 0.5 + 0.5, alpha); + n /= (abs(n.x) + abs(n.y) + abs(n.z)); + n.xy = n.z >= 0.0 ? n.xy : OctWrap(n.xy); + return n.xy; } -// Decode a UNORM [0,1] sample back to a normalized view-space normal [-1,1] -float3 DecodeNormalGBuffer(float3 encoded) +// Decode octahedral [-1,1]^2 back to a unit normal +float3 DecodeNormalGBuffer(float2 encoded) { - return normalize(encoded * 2.0 - 1.0); + float3 n; + n.z = 1.0 - abs(encoded.x) - abs(encoded.y); + n.xy = n.z >= 0.0 ? encoded.xy : OctWrap(encoded.xy); + return normalize(n); } diff --git a/D3D11Engine/Shaders/PS_AtmosphereGround.hlsl b/D3D11Engine/Shaders/PS_AtmosphereGround.hlsl index e1b99a31..6bba1c70 100644 --- a/D3D11Engine/Shaders/PS_AtmosphereGround.hlsl +++ b/D3D11Engine/Shaders/PS_AtmosphereGround.hlsl @@ -83,7 +83,7 @@ DEFERRED_PS_OUTPUT PSMain( PS_INPUT Input ) : SV_TARGET DEFERRED_PS_OUTPUT output; output.vDiffuse = float4(color.rgb, Input.vDiffuse.y); - output.vNrm = EncodeNormalGBuffer(nrm, 1.0f); + output.vNrm = EncodeNormalGBuffer(nrm); output.vSI_SP.x = MI_SpecularIntensity; output.vSI_SP.y = MI_SpecularPower; diff --git a/D3D11Engine/Shaders/PS_DS_AtmosphericScattering.hlsl b/D3D11Engine/Shaders/PS_DS_AtmosphericScattering.hlsl index cdef23cf..e2e2693f 100644 --- a/D3D11Engine/Shaders/PS_DS_AtmosphericScattering.hlsl +++ b/D3D11Engine/Shaders/PS_DS_AtmosphericScattering.hlsl @@ -694,19 +694,17 @@ float4 PSMain(PS_INPUT Input) : SV_TARGET float4 diffuse = TX_Diffuse.Sample(SS_Linear, uv); float vertLighting = diffuse.a; - // Get the second GBuffer - float4 gb2 = TX_Nrm.Sample(SS_Linear, uv); - - // If we dont have a normal, just return the diffuse color - if (gb2.w < 0.001f) - // FIXME: This is just so the sky is correctly rendered when atmospheric scattering is enabled - // Ideally we should draw the sky in a separate forward after applying the lighting, but that breaks other stuff. - // For now, we need the alpha channel (2-bit in R10G10B10A2_UNORM) to keep the sky rendering working, - // so we can't use Octahedral encoding. We return the diffuse color here and skip lighting for sky pixels. + // Sample depth first to detect sky pixels (reversed-Z: sky has depth == 0.0) + float expDepth = TX_Depth.Sample(SS_Linear, uv).r; + if (expDepth < 0.00001f) + // Sky pixel — no geometry was written, just return the diffuse (sky) color return float4(diffuse.rgb, 1); - // Decode the view-space normal back from R10G10B10A2_UNORM - float3 normal = DecodeNormalGBuffer(gb2.xyz); + // Get the second GBuffer + float2 gb2 = TX_Nrm.Sample(SS_Linear, uv).xy; + + // Decode the view-space normal from octahedral R16G16_SNORM + float3 normal = DecodeNormalGBuffer(gb2); // Get specular parameters float4 gb3 = TX_SI_SP.Sample(SS_Linear, uv); @@ -714,7 +712,6 @@ float4 PSMain(PS_INPUT Input) : SV_TARGET float specPower = gb3.y; // Reconstruct VS World Position from depth - float expDepth = TX_Depth.Sample(SS_Linear, uv).r; float3 vsPosition = VSPositionFromDepth(expDepth, uv); float3 wsPosition = mul(float4(vsPosition, 1), SQ_InvView).xyz; float3 V = normalize(-vsPosition); diff --git a/D3D11Engine/Shaders/PS_DS_PointLight.hlsl b/D3D11Engine/Shaders/PS_DS_PointLight.hlsl index 196ab2db..584e5087 100644 --- a/D3D11Engine/Shaders/PS_DS_PointLight.hlsl +++ b/D3D11Engine/Shaders/PS_DS_PointLight.hlsl @@ -100,10 +100,10 @@ float4 PSMain( PS_INPUT Input ) : SV_TARGET float4 diffuse = TX_Diffuse.Sample(SS_Linear, uv); // Get the second GBuffer - float4 gb2 = TX_Nrm.Sample(SS_Linear, uv); + float2 gb2 = TX_Nrm.Sample(SS_Linear, uv).xy; - // Decode the view-space normal back from R10G10B10A2_UNORM - float3 normal = DecodeNormalGBuffer(gb2.xyz); + // Decode the view-space normal from octahedral R16G16_SNORM + float3 normal = DecodeNormalGBuffer(gb2); // Get specular parameters float4 gb3 = TX_SI_SP.Sample(SS_Linear, uv); diff --git a/D3D11Engine/Shaders/PS_DS_PointLightDynShadow.hlsl b/D3D11Engine/Shaders/PS_DS_PointLightDynShadow.hlsl index 910c25b6..f2108a41 100644 --- a/D3D11Engine/Shaders/PS_DS_PointLightDynShadow.hlsl +++ b/D3D11Engine/Shaders/PS_DS_PointLightDynShadow.hlsl @@ -144,10 +144,10 @@ float4 PSMain( PS_INPUT Input ) : SV_TARGET float4 diffuse = TX_Diffuse.Sample(SS_Linear, uv); // Get the second GBuffer - float4 gb2 = TX_Nrm.Sample(SS_Linear, uv); + float2 gb2 = TX_Nrm.Sample(SS_Linear, uv).xy; - // Decode the view-space normal back from R10G10B10A2_UNORM - float3 normal = DecodeNormalGBuffer(gb2.xyz); + // Decode the view-space normal from octahedral R16G16_SNORM + float3 normal = DecodeNormalGBuffer(gb2); // Get specular parameters float4 gb3 = TX_SI_SP.Sample(SS_Linear, uv); diff --git a/D3D11Engine/Shaders/PS_Diffuse.hlsl b/D3D11Engine/Shaders/PS_Diffuse.hlsl index ed335666..83f1a6c9 100644 --- a/D3D11Engine/Shaders/PS_Diffuse.hlsl +++ b/D3D11Engine/Shaders/PS_Diffuse.hlsl @@ -120,7 +120,7 @@ DEFERRED_PS_OUTPUT PSMain( PS_INPUT Input ) : SV_TARGET //output.vDiffuse = float4(Input.vTexcoord2, 0, 1); //output.vDiffuse = float4(Input.vNormalVS, 1); - output.vNrm = EncodeNormalGBuffer(nrm, 1.0f); + output.vNrm = EncodeNormalGBuffer(nrm); output.vSI_SP.x = MI_SpecularIntensity * fx.r; output.vSI_SP.y = MI_SpecularPower * fx.g; diff --git a/D3D11Engine/Shaders/PS_DiffuseAtlas.hlsl b/D3D11Engine/Shaders/PS_DiffuseAtlas.hlsl index 11affa27..8ad84315 100644 --- a/D3D11Engine/Shaders/PS_DiffuseAtlas.hlsl +++ b/D3D11Engine/Shaders/PS_DiffuseAtlas.hlsl @@ -102,7 +102,7 @@ DEFERRED_PS_OUTPUT PSMain( PS_INPUT Input ) : SV_TARGET output.vDiffuse = float4(color.rgb, Input.vDiffuse.y); - output.vNrm = EncodeNormalGBuffer(nrm, 1.0f); + output.vNrm = EncodeNormalGBuffer(nrm); output.vSI_SP.x = MI_SpecularIntensity * fx.r; output.vSI_SP.y = MI_SpecularPower * fx.g; diff --git a/D3D11Engine/Shaders/PS_Grass.hlsl b/D3D11Engine/Shaders/PS_Grass.hlsl index f393d5a0..a77ebbeb 100644 --- a/D3D11Engine/Shaders/PS_Grass.hlsl +++ b/D3D11Engine/Shaders/PS_Grass.hlsl @@ -65,7 +65,7 @@ DEFERRED_PS_OUTPUT PSMain( PS_INPUT Input ) : SV_TARGET DEFERRED_PS_OUTPUT output; output.vDiffuse = float4(color.rgb, 1); - output.vNrm = EncodeNormalGBuffer(normalize(Input.vNormalVS), 1.0f); + output.vNrm = EncodeNormalGBuffer(normalize(Input.vNormalVS)); output.vSI_SP.xy = 0; diff --git a/D3D11Engine/Shaders/PS_PFX_GodRayMask.hlsl b/D3D11Engine/Shaders/PS_PFX_GodRayMask.hlsl index 864876a6..a1cbdb48 100644 --- a/D3D11Engine/Shaders/PS_PFX_GodRayMask.hlsl +++ b/D3D11Engine/Shaders/PS_PFX_GodRayMask.hlsl @@ -8,7 +8,7 @@ SamplerState SS_Linear : register( s0 ); SamplerState SS_samMirror : register( s1 ); Texture2D TX_Texture0 : register( t0 ); -Texture2D TX_Texture1 : register( t1 ); +Texture2D TX_Depth : register( t1 ); //-------------------------------------------------------------------------------------- // Input / Output structures @@ -26,9 +26,10 @@ struct PS_INPUT float4 PSMain( PS_INPUT Input ) : SV_TARGET { float4 color = TX_Texture0.Sample(SS_Linear, Input.vTexcoord); - float4 gb2 = TX_Texture1.Sample(SS_Linear, Input.vTexcoord); - if(gb2.w < 0.001f) + // Sky detection via depth buffer (reversed-Z: sky has depth == 0.0) + float depth = TX_Depth.Sample(SS_Linear, Input.vTexcoord).r; + if(depth < 0.00001f) return color; return float4(0,0,0,0); diff --git a/D3D11Engine/Shaders/PS_World.hlsl b/D3D11Engine/Shaders/PS_World.hlsl index d0b8eb62..8311f0d8 100644 --- a/D3D11Engine/Shaders/PS_World.hlsl +++ b/D3D11Engine/Shaders/PS_World.hlsl @@ -85,7 +85,7 @@ DEFERRED_PS_OUTPUT PSMain( PS_INPUT Input ) : SV_TARGET DEFERRED_PS_OUTPUT output; output.vDiffuse = float4(color.rgb, Input.vDiffuse.y); - output.vNrm = EncodeNormalGBuffer(normalize(Input.vNormalVS), 1.0f); + output.vNrm = EncodeNormalGBuffer(normalize(Input.vNormalVS)); output.vSI_SP.x = MI_SpecularIntensity; output.vSI_SP.y = MI_SpecularPower; diff --git a/D3D11Engine/Shaders/PS_WorldAtlas.hlsl b/D3D11Engine/Shaders/PS_WorldAtlas.hlsl index 415c4f17..42aed68f 100644 --- a/D3D11Engine/Shaders/PS_WorldAtlas.hlsl +++ b/D3D11Engine/Shaders/PS_WorldAtlas.hlsl @@ -160,7 +160,7 @@ DEFERRED_PS_OUTPUT PSMain( PS_INPUT Input ) : SV_TARGET output.vDiffuse = float4(color.rgb, Input.vDiffuse.y); - output.vNrm = EncodeNormalGBuffer(nrm, 1.0f); + output.vNrm = EncodeNormalGBuffer(nrm); output.vSI_SP.x = MI_SpecularIntensity * fx.r; output.vSI_SP.y = MI_SpecularPower * fx.g; From 0ed892bcac55cff8362e106c9f27f1b8c3c58f6c Mon Sep 17 00:00:00 2001 From: kirides <13602143+kirides@users.noreply.github.com> Date: Thu, 2 Apr 2026 23:18:05 +0200 Subject: [PATCH 23/42] stuff --- D3D11Engine/D3D11AtlasTypes.h | 31 + D3D11Engine/D3D11Engine.vcxproj | 5 + D3D11Engine/D3D11Engine.vcxproj.filters | 15 + D3D11Engine/D3D11GraphicsEngine.cpp | 1132 +------------------- D3D11Engine/D3D11LegacyDeferredShading.cpp | 7 - D3D11Engine/D3D11MeshAtlasPass.cpp | 495 +++++++++ D3D11Engine/D3D11MeshAtlasPass.h | 83 ++ D3D11Engine/D3D11VobAtlasPass.cpp | 608 +++++++++++ D3D11Engine/D3D11VobAtlasPass.h | 81 ++ 9 files changed, 1337 insertions(+), 1120 deletions(-) create mode 100644 D3D11Engine/D3D11AtlasTypes.h create mode 100644 D3D11Engine/D3D11MeshAtlasPass.cpp create mode 100644 D3D11Engine/D3D11MeshAtlasPass.h create mode 100644 D3D11Engine/D3D11VobAtlasPass.cpp create mode 100644 D3D11Engine/D3D11VobAtlasPass.h diff --git a/D3D11Engine/D3D11AtlasTypes.h b/D3D11Engine/D3D11AtlasTypes.h new file mode 100644 index 00000000..05423800 --- /dev/null +++ b/D3D11Engine/D3D11AtlasTypes.h @@ -0,0 +1,31 @@ +#pragma once +#include "D3D11TextureAtlasManager.h" +#include "D3D11IndirectBuffer.h" +#include "ConstantBufferStructs.h" + +#include +#include +#include + +// Shared atlas constants +constexpr size_t TEXTURE_ATLAS_MAX = DXGI_FORMAT_V408 + 1; +struct MeshVisualInfo; + +// Tracks one unique submesh in the global geometry buffer +struct StaticSubmeshEntry { + UINT indexCount; + UINT startIndexLocation; // offset into global IB + int baseVertexLocation; // offset into global VB + TextureDescriptor atlasDesc; + MeshVisualInfo* visual; // which visual owns this submesh +}; + +// Groups all submeshes that share one atlas (same DXGI_FORMAT) +struct AtlasDrawGroup { + DXGI_FORMAT format = DXGI_FORMAT_UNKNOWN; + std::vector submeshes; + std::vector indirectArgs; + std::unique_ptr indirectBuffer; + UINT mergedArgsOffset = 0; // byte offset into merged indirect args buffer + UINT mergedArgsCount = 0; // number of args in this group +}; diff --git a/D3D11Engine/D3D11Engine.vcxproj b/D3D11Engine/D3D11Engine.vcxproj index e64aa5bc..323fad8b 100644 --- a/D3D11Engine/D3D11Engine.vcxproj +++ b/D3D11Engine/D3D11Engine.vcxproj @@ -801,6 +801,7 @@ copy "$(OutDir)$(TargetName).pdb" "$(G1_SYSTEM_PATH)\ddraw.pdb" + @@ -819,6 +820,7 @@ copy "$(OutDir)$(TargetName).pdb" "$(G1_SYSTEM_PATH)\ddraw.pdb" + @@ -855,6 +857,7 @@ copy "$(OutDir)$(TargetName).pdb" "$(G1_SYSTEM_PATH)\ddraw.pdb" + @@ -1083,6 +1086,7 @@ copy "$(OutDir)$(TargetName).pdb" "$(G1_SYSTEM_PATH)\ddraw.pdb" + @@ -1116,6 +1120,7 @@ copy "$(OutDir)$(TargetName).pdb" "$(G1_SYSTEM_PATH)\ddraw.pdb" + ../pch.h diff --git a/D3D11Engine/D3D11Engine.vcxproj.filters b/D3D11Engine/D3D11Engine.vcxproj.filters index 5e150944..218393df 100644 --- a/D3D11Engine/D3D11Engine.vcxproj.filters +++ b/D3D11Engine/D3D11Engine.vcxproj.filters @@ -855,6 +855,15 @@ + + Engine\D3D11 + + + Engine\D3D11 + + + Engine\D3D11 + @@ -1169,6 +1178,12 @@ Engine + + Engine\D3D11 + + + Engine\D3D11 + diff --git a/D3D11Engine/D3D11GraphicsEngine.cpp b/D3D11Engine/D3D11GraphicsEngine.cpp index f10f8191..c4857428 100644 --- a/D3D11Engine/D3D11GraphicsEngine.cpp +++ b/D3D11Engine/D3D11GraphicsEngine.cpp @@ -1,6 +1,9 @@ #include "D3D11GraphicsEngine.h" #include "D3D11ShadowMap.h" +#include "D3D11VobAtlasPass.h" +#include "D3D11MeshAtlasPass.h" + #include "AlignedAllocator.h" #include "D3D11Effect.h" #include "D3D11GShader.h" @@ -167,6 +170,9 @@ D3D11GraphicsEngine::D3D11GraphicsEngine() { m_lowlatency = false; m_isWindowActive = false; + m_VobAtlasPass = std::make_unique( this ); + m_MeshAtlasPass = std::make_unique( this ); + // Initialize previous view-proj matrix to identity for motion vectors XMStoreFloat4x4( &m_PrevViewProjMatrix, XMMatrixIdentity() ); @@ -3922,7 +3928,7 @@ XRESULT D3D11GraphicsEngine::DrawWorldMesh_Indirect( bool noTextures ) { Engine::GAPI->ResetWorldTransform(); // Draw atlas path first (handles opaque + alpha-test submeshes that were atlased) - DrawWorldMesh_Atlas(); + m_MeshAtlasPass->Draw(); struct MDI_DrawArgs { @@ -3981,7 +3987,7 @@ XRESULT D3D11GraphicsEngine::DrawWorldMesh_Indirect( bool noTextures ) { for ( auto const& renderItem : renderList ) { for ( auto const& worldMesh : renderItem->WorldMeshes ) { // Skip submeshes already drawn by the atlas path - if ( m_WorldMeshAtlasedSubmeshes.count( worldMesh.second ) ) + if ( m_MeshAtlasPass->IsSubmeshAtlased( worldMesh.second ) ) continue; zCTexture* aniTex = worldMesh.first.Material->GetTexture(); @@ -4194,7 +4200,7 @@ XRESULT D3D11GraphicsEngine::DrawWorldMesh( bool noTextures ) { Engine::GAPI->SetViewTransformXM( view ); Engine::GAPI->ResetWorldTransform(); - DrawWorldMesh_Atlas(); + m_MeshAtlasPass->Draw(); SetActivePixelShader( PShaderID::PS_Diffuse ); SetActiveVertexShader( VShaderID::VS_Ex ); @@ -4242,7 +4248,7 @@ XRESULT D3D11GraphicsEngine::DrawWorldMesh( bool noTextures ) { for ( auto const& renderItem : renderList ) { for ( auto const& worldMesh : renderItem->WorldMeshes ) { // Skip submeshes already drawn by the atlas path - if ( m_WorldMeshAtlasedSubmeshes.count( worldMesh.second ) ) + if ( m_MeshAtlasPass->IsSubmeshAtlased( worldMesh.second ) ) continue; if ( worldMesh.first.Material ) { @@ -5409,10 +5415,10 @@ void XM_CALLCONV D3D11GraphicsEngine::DrawWorldAroundForWorldShadow( FXMVECTOR p if ( Engine::GAPI->GetRendererState().RendererSettings.DrawVOBs ) { bool drawStaticVobs = true; - if ( Engine::GAPI->GetRendererState().RendererSettings.UseIndirectVobShadows && !m_AtlasDrawGroups.empty() ) { - // GPU indirect path: reuse DrawVOBsIndirect with the cascade/shadow frustum. + if ( Engine::GAPI->GetRendererState().RendererSettings.UseIndirectVobShadows && m_VobAtlasPass->IsReady() ) { + // GPU indirect path: reuse the VOB atlas pass with the cascade/shadow frustum. // BC1 groups render depth-only (no PS); BC2 groups use the alpha-test PS. - DrawVOBsIndirect( currentFrustum, /*bindPS=*/false ); + m_VobAtlasPass->Draw( currentFrustum, /*bindPS=*/false ); drawStaticVobs = false; } @@ -5883,7 +5889,7 @@ XRESULT D3D11GraphicsEngine::DrawVOBsInstanced() { auto _ = START_TIMING( "VOBs" ); bool needsDrawVobs = true; - if ( !m_AtlasDrawGroups.empty() ) { + if ( m_VobAtlasPass->IsReady() ) { Frustum cameraFrustum = Frustum::AlwaysContainingFrustum(); if ( auto cam = zCCamera::GetCamera() ) { cam->Activate(); @@ -5891,7 +5897,7 @@ XRESULT D3D11GraphicsEngine::DrawVOBsInstanced() { XMMatrixTranspose( XMLoadFloat4x4( &cam->trafoView ) ), XMLoadFloat4x4( &cam->trafoProjection ) ); } - DrawVOBsIndirect( cameraFrustum ); + m_VobAtlasPass->Draw( cameraFrustum ); needsDrawVobs = false; } @@ -6222,190 +6228,6 @@ XRESULT D3D11GraphicsEngine::DrawVOBsInstanced() { return XR_SUCCESS; } -XRESULT D3D11GraphicsEngine::DrawVOBsIndirect( const Frustum& frustum, bool bindPS ) { - if ( m_AtlasDrawGroups.empty() || !m_VobGPUBuffer || !m_StaticGlobalVertexBuffer || !m_StaticGlobalIndexBuffer ) - return XR_SUCCESS; - - auto _ = RecordGraphicsEvent( L"DrawVOBsIndirect" ); - - auto& context = GetContext(); - - // --- 0b. Build Hi-Z pyramid from current depth buffer (main pass only) --- - // The world mesh has already been rendered, so the depth buffer contains valid - // occluder geometry. Copy depth first to avoid DSV/SRV resource hazard, then - // build the hierarchical min-depth mip chain for GPU culling. - const bool useHiZ = bindPS && m_HiZTexture && m_HiZSRV; - if ( useHiZ ) { - CopyDepthStencil(); - BuildHiZPyramid(); - } - - // --- 1. Reset indirect args InstanceCounts via CopyResource from template --- - context->CopyResource( m_MergedIndirectArgs->GetIndirectBuffer().Get(), - m_IndirectArgsTemplate.Get() ); - - // --- 2. Update cull constant buffer --- - extern float vobAnimation_WindStrength; - CullConstants cb = {}; - memcpy( cb.frustumPlanes, frustum.GetPlanes().data(), 6 * sizeof( XMFLOAT4 ) ); - cb.cameraPosition = Engine::GAPI->GetCameraPosition(); - cb.drawDistance = Engine::GAPI->GetRendererState().RendererSettings.OutdoorVobDrawRadius; - cb.globalWindStrength = vobAnimation_WindStrength; - cb.windAdvanced = (Engine::GAPI->GetRendererState().RendererSettings.WindQuality - == GothicRendererSettings::EWindQuality::WIND_QUALITY_ADVANCED) ? 1 : 0; - cb.numVobs = static_cast(m_StaticVobs.size()); - cb.feedbackFrameNumber = 0; - - // Hi-Z occlusion culling: populate view-projection and Hi-Z dimensions - if ( useHiZ ) { - cb.enableHiZ = 1; - cb.hiZMipCount = m_HiZMipCount; - cb.hiZWidth = static_cast( DepthStencilBuffer->GetSizeX() ); - cb.hiZHeight = static_cast( DepthStencilBuffer->GetSizeY() ); - - XMMATRIX view = Engine::GAPI->GetViewMatrixXM(); - auto& projF = Engine::GAPI->GetProjectionMatrix(); - XMStoreFloat4x4( &cb.viewProjection, - XMMatrixMultiply( view, XMLoadFloat4x4( &projF ) ) ); - } else { - cb.enableHiZ = 0; - cb.hiZMipCount = 0; - cb.hiZWidth = 0.0f; - cb.hiZHeight = 0.0f; - XMStoreFloat4x4( &cb.viewProjection, XMMatrixIdentity() ); - } - - m_CullConstantBuffer->UpdateBuffer( &cb ); - m_CullConstantBuffer->BindToComputeShader( 0 ); - - // --- 3. Dispatch GPU cull compute shader --- - auto cullCS = ShaderManager->GetCShader( CShaderID::CS_CullVobs ); - if ( !cullCS ) - return XR_SUCCESS; - cullCS->Apply(); - - // SRV t0 = VobGPUData, t1 = SubmeshGPUData - ID3D11ShaderResourceView* srvs[2] = { - m_VobGPUBuffer->GetSRV(), - m_SubmeshGPUBuffer->GetSRV() - }; - context->CSSetShaderResources( 0, 2, srvs ); - - // SRV t2 = Hi-Z pyramid texture (for occlusion culling) - if ( useHiZ ) { - ID3D11ShaderResourceView* hiZSRV = m_HiZSRV.Get(); - context->CSSetShaderResources( 2, 1, &hiZSRV ); - } - - // UAV u0 = InstanceOutput (structured), u1 = IndirectArgs (raw byte address) - ID3D11UnorderedAccessView* uavs[2] = { - m_InstanceBufferGPU->GetUAV(), - m_MergedIndirectArgs->GetUnorderedAccessView().Get() - }; - context->CSSetUnorderedAccessViews( 0, 2, uavs, nullptr ); - - UINT numGroups = (static_cast(m_StaticVobs.size()) + 63) / 64; - context->Dispatch( numGroups, 1, 1 ); - - // Unbind CS resources - ID3D11ShaderResourceView* nullSRV[3] = { nullptr, nullptr, nullptr }; - ID3D11UnorderedAccessView* nullUAV[2] = { nullptr, nullptr }; - context->CSSetShaderResources( 0, 3, nullSRV ); - context->CSSetUnorderedAccessViews( 0, 2, nullUAV, nullptr ); - context->CSSetShader( nullptr, nullptr, 0 ); - - // --- 4. Bind global geometry (once) --- - UINT strides[2] = { sizeof( ExVertexStruct ), sizeof( uint32_t ) }; - UINT offsets[2] = { 0, 0 }; - ID3D11Buffer* vbs[2] = { - m_StaticGlobalVertexBuffer->GetVertexBuffer().Get(), - m_GlobalInstanceIdBuffer->GetVertexBuffer().Get() - }; - context->IASetVertexBuffers( 0, 2, vbs, strides, offsets ); - context->IASetIndexBuffer( m_StaticGlobalIndexBuffer->GetVertexBuffer().Get(), VERTEX_INDEX_DXGI_FORMAT, 0 ); - context->IASetPrimitiveTopology( D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST ); - - // --- 5. Bind instance StructuredBuffer (GPU-written) to VS t1 --- - ID3D11ShaderResourceView* instSRV = m_InstanceBufferGPU->GetSRV(); - context->VSSetShaderResources( 1, 1, &instSRV ); - - // --- 6. Set shaders --- - SetActiveVertexShader( VShaderID::VS_ExInstancedObjIndirectAtlas ); - - SetupVS_ExMeshDrawCall(); - SetupVS_ExConstantBuffer(); - - // Wind constant buffer (VS still needs this for the wind animation code) - VS_ExConstantBuffer_Wind windBuff{}; - ApplyWindProps( windBuff ); - ActiveVS->GetConstantBuffer()[1]->UpdateBuffer( &windBuff ); - ActiveVS->GetConstantBuffer()[1]->BindToVertexShader( 1 ); - - // Bind reflection cube (only needed for opaque/full-shading pass) - if ( bindPS ) - context->PSSetShaderResources( 4, 1, ReflectionCube.GetAddressOf() ); - - ActiveVS->Apply(); - - // Shared PS constant buffer data (same for both shader variants) - MaterialInfo defMaterial{}; - GSky* sky = Engine::GAPI->GetSky(); - - // --- 7. Draw per atlas group using merged indirect args --- - for ( auto& group : m_AtlasDrawGroups ) { - // Bind this atlas's Texture2DArray SRV to PS slot t0 - ID3D11ShaderResourceView* srv = m_TextureAtlasses[group.format].atlasSRV; - if ( !srv ) - continue; - - // In shadow pass (bindPS=false): BC2/BC3 have alpha and need the alpha-test shader. - // BC1 is fully opaque — depth-only, no PS needed. - const bool needsPS = bindPS || (group.format == DXGI_FORMAT_BC2_UNORM); - - if ( needsPS ) { - context->PSSetShaderResources( 0, 1, &srv ); - - // Full shading: select by format. Shadow alpha-test: always alpha-test PS. - if ( bindPS && group.format != DXGI_FORMAT_BC2_UNORM ) - SetActivePixelShader( PShaderID::PS_DiffuseAtlas ); - else - SetActivePixelShader( PShaderID::PS_DiffuseAtlasAlphaTest ); - - ActivePS->GetConstantBuffer()[0]->UpdateBuffer( - &Engine::GAPI->GetRendererState().GraphicsState ); - ActivePS->GetConstantBuffer()[0]->BindToPixelShader( 0 ); - - ActivePS->GetConstantBuffer()[1]->UpdateBuffer( &sky->GetAtmosphereCB() ); - ActivePS->GetConstantBuffer()[1]->BindToPixelShader( 1 ); - - ActivePS->GetConstantBuffer()[2]->UpdateBuffer( &defMaterial.buffer ); - ActivePS->GetConstantBuffer()[2]->BindToPixelShader( 2 ); - - OutdoorVobsConstantBuffer->BindToPixelShader( 3 ); - - ActivePS->Apply(); - } else { - // Depth-only opaque: unbind pixel shader - context->PSSetShader( nullptr, nullptr, 0 ); - } - - // DrawMultiIndexedInstancedIndirect falls back to individual DrawIndexedInstancedIndirect - // calls via Stub_DrawMultiIndexedInstancedIndirect if hardware doesn't support MDI - DrawMultiIndexedInstancedIndirect( - context.Get(), - group.mergedArgsCount, - m_MergedIndirectArgs->GetIndirectBuffer().Get(), - group.mergedArgsOffset, - sizeof( D3D11_DRAW_INDEXED_INSTANCED_INDIRECT_ARGS ) ); - } - - // Unbind instance buffer - ID3D11ShaderResourceView* nullVSSRV = nullptr; - context->VSSetShaderResources( 1, 1, &nullVSSRV ); - - return XR_SUCCESS; -} - /** Draws the static VOBs */ XRESULT D3D11GraphicsEngine::DrawFrameAlphaMeshes() { @@ -8058,340 +7880,6 @@ void D3D11GraphicsEngine::StorePrevViewProjMatrix() { } } -void D3D11GraphicsEngine::BuildStaticGeometryBuffers() { - std::vector allVertices; - std::vector allIndices; - - // Temporary: group submeshes by atlas format - std::map groupsByFormat; - - // Track which MeshInfo* we've already added (same visual used by many vobs shares geometry) - std::unordered_set processedMeshes; - - // Pre-count vertices/indices to reserve and avoid incremental reallocation - { - size_t totalVertices = 0, totalIndices = 0; - std::unordered_set counted; - for ( auto const& [proto, visual] : Engine::GAPI->GetStaticMeshVisuals() ) { - for ( auto const& [meshKey, meshList] : visual->MeshesByTexture ) { - if ( m_TextureAtlasLookup.find( meshKey.Texture ) == m_TextureAtlasLookup.end() ) - continue; - for ( MeshInfo* mi : meshList ) { - if ( counted.insert( mi ).second ) { - totalVertices += mi->Vertices.size(); - totalIndices += mi->Indices.size(); - } - } - } - } - allVertices.reserve( totalVertices ); - allIndices.reserve( totalIndices ); - } - - for ( auto const& [proto, visual] : Engine::GAPI->GetStaticMeshVisuals() ) { - for ( auto const& [meshKey, meshList] : visual->MeshesByTexture ) { - // Look up atlas descriptor for this texture - auto it = m_TextureAtlasLookup.find( meshKey.Texture ); - if ( it == m_TextureAtlasLookup.end() ) - continue; // texture not in any atlas - - const TextureAtlasLookup& lookup = it->second; - auto& group = groupsByFormat[lookup.atlasFormat]; - group.format = lookup.atlasFormat; - - for ( MeshInfo* mi : meshList ) { - if ( !processedMeshes.insert( mi ).second ) - continue; // already in global buffer - - UINT baseVertex = static_cast(allVertices.size()); - UINT startIndex = static_cast(allIndices.size()); - - allVertices.insert( allVertices.end(), mi->Vertices.begin(), mi->Vertices.end() ); - allIndices.insert( allIndices.end(), mi->Indices.begin(), mi->Indices.end() ); - - StaticSubmeshEntry entry; - entry.indexCount = static_cast(mi->Indices.size()); - entry.startIndexLocation = startIndex; - entry.baseVertexLocation = static_cast(baseVertex); - entry.atlasDesc = lookup.descriptor; - entry.visual = visual; - - group.submeshes.push_back( entry ); - - // Pre-build indirect args (InstanceCount filled per-frame) - D3D11_DRAW_INDEXED_INSTANCED_INDIRECT_ARGS args = {}; - args.IndexCountPerInstance = entry.indexCount; - args.InstanceCount = 0; - args.StartIndexLocation = entry.startIndexLocation; - args.BaseVertexLocation = entry.baseVertexLocation; - args.StartInstanceLocation = 0; - group.indirectArgs.push_back( args ); - } - } - } - - if ( allVertices.empty() ) { - LogWarn() << "BuildStaticGeometryBuffers: No vertices to process"; - return; - } - - // Create global vertex buffer (IMMUTABLE) - m_StaticGlobalVertexBuffer = std::make_unique(); - m_StaticGlobalVertexBuffer->Init( - allVertices.data(), - static_cast(allVertices.size() * sizeof( ExVertexStruct )), - D3D11VertexBuffer::B_VERTEXBUFFER, - D3D11VertexBuffer::U_IMMUTABLE, - D3D11VertexBuffer::CA_NONE ); - - // Create global index buffer (IMMUTABLE) - m_StaticGlobalIndexBuffer = std::make_unique(); - m_StaticGlobalIndexBuffer->Init( - allIndices.data(), - static_cast(allIndices.size() * sizeof( VERTEX_INDEX )), - D3D11VertexBuffer::B_INDEXBUFFER, - D3D11VertexBuffer::U_IMMUTABLE, - D3D11VertexBuffer::CA_NONE ); - - // Create instance ID buffer: {0, 1, 2, ..., N} - // A conservative upper bound for max instances (vobs * avg submeshes) - UINT maxInstanceIds = static_cast(m_StaticVobs.size() * 4); - if ( maxInstanceIds < 4096 ) maxInstanceIds = 4096; - std::vector instanceIds( maxInstanceIds ); - for ( uint32_t i = 0; i < maxInstanceIds; i++ ) - instanceIds[i] = i; - - m_GlobalInstanceIdBuffer = std::make_unique(); - m_GlobalInstanceIdBuffer->Init( - instanceIds.data(), - static_cast(instanceIds.size() * sizeof( uint32_t )), - D3D11VertexBuffer::B_VERTEXBUFFER, - D3D11VertexBuffer::U_IMMUTABLE, - D3D11VertexBuffer::CA_NONE ); - - // Move groups into final vector and create indirect buffers - m_AtlasDrawGroups.clear(); - for ( auto& [fmt, group] : groupsByFormat ) { - if ( group.indirectArgs.empty() ) - continue; - - UINT bufSize = static_cast(group.indirectArgs.size() * sizeof( D3D11_DRAW_INDEXED_INSTANCED_INDIRECT_ARGS )); - group.indirectBuffer = std::make_unique(); - group.indirectBuffer->Init( - group.indirectArgs.data(), bufSize, - D3D11IndirectBuffer::B_VERTEXBUFFER, - D3D11IndirectBuffer::U_DYNAMIC, - D3D11IndirectBuffer::CA_WRITE ); - - m_AtlasDrawGroups.push_back( std::move( group ) ); - } - - LogInfo() << "Atlas geometry: " << allVertices.size() << " vertices, " - << allIndices.size() << " indices, " - << m_AtlasDrawGroups.size() << " atlas groups, " - << processedMeshes.size() << " unique submeshes"; -} - -void D3D11GraphicsEngine::BuildGPUCullingBuffers() { - if ( m_AtlasDrawGroups.empty() || m_StaticVobs.empty() ) - return; - - // --- 1. Build visual -> vob count mapping --- - std::unordered_map vobsPerVisual; - std::unordered_map> vobIndicesByVisual; - - for ( size_t i = 0; i < m_StaticVobs.size(); i++ ) { - auto* visual = reinterpret_cast(m_StaticVobs[i]->VisualInfo); - vobsPerVisual[visual]++; - vobIndicesByVisual[visual].push_back( i ); - } - - // --- 2. Build merged indirect args + SubmeshGPUData --- - // Pass 1: Build merged indirect args (flat, in per-group order) and collect - // per-visual submesh entries. We must ensure SubmeshGPUData entries for the - // same visual are contiguous in the final array, but they may come from - // different atlas groups (e.g., BC1 + BC2 textures on the same mesh). - std::vector mergedArgs; - std::unordered_map> visualSubmeshMap; - - // Build reverse-lookup: for each format, map (slice, x, y) -> source index - // so we can populate SubmeshGPUData.globalSourceIndex for GPU feedback. - struct SliceXYKey { - UINT slice, x, y; - bool operator==( const SliceXYKey& o ) const { return slice == o.slice && x == o.x && y == o.y; } - }; - struct SliceXYHash { - size_t operator()( const SliceXYKey& k ) const { - return std::hash{}( k.slice ) ^ ( std::hash{}( k.x ) << 11 ) ^ ( std::hash{}( k.y ) << 22 ); - } - }; - std::unordered_map> sourceIndexLookup; - std::unordered_map formatGlobalOffsets; // cached offsets for global index computation - - UINT runningInstanceOffset = 0; - UINT globalArgIndex = 0; - - for ( auto& group : m_AtlasDrawGroups ) { - group.mergedArgsOffset = static_cast(mergedArgs.size() * sizeof( D3D11_DRAW_INDEXED_INSTANCED_INDIRECT_ARGS )); - group.mergedArgsCount = static_cast(group.indirectArgs.size()); - - for ( size_t si = 0; si < group.submeshes.size(); si++ ) { - const auto& submesh = group.submeshes[si]; - MeshVisualInfo* visual = submesh.visual; - UINT maxInstances = vobsPerVisual.count( visual ) ? vobsPerVisual[visual] : 0; - - // Build the indirect arg with static fields; InstanceCount will be set by CS - D3D11_DRAW_INDEXED_INSTANCED_INDIRECT_ARGS args = {}; - args.IndexCountPerInstance = submesh.indexCount; - args.InstanceCount = 0; - args.StartIndexLocation = submesh.startIndexLocation; - args.BaseVertexLocation = submesh.baseVertexLocation; - args.StartInstanceLocation = runningInstanceOffset; - mergedArgs.push_back( args ); - - // Collect per-visual submesh GPU data (written contiguously in pass 2) - SubmeshGPUData smGPU = {}; - smGPU.slice = submesh.atlasDesc.slice; - smGPU.uStart = submesh.atlasDesc.uStart; - smGPU.vStart = submesh.atlasDesc.vStart; - smGPU.uEnd = submesh.atlasDesc.uEnd; - smGPU.vEnd = submesh.atlasDesc.vEnd; - smGPU.argIndex = globalArgIndex; - smGPU.instanceBaseOffset = runningInstanceOffset; - - // Populate globalSourceIndex for GPU feedback by reverse-looking up pixel coords - smGPU.globalSourceIndex = 0; - - visualSubmeshMap[visual].push_back( smGPU ); - - runningInstanceOffset += maxInstances; - globalArgIndex++; - } - } - - m_TotalMaxInstances = runningInstanceOffset; - - // Pass 2: Flatten per-visual submesh entries into a contiguous SubmeshGPUData array. - // This guarantees VobGPUData.submeshStart/submeshCount indexes a contiguous range. - struct VisualSubmeshRange { - UINT start; - UINT count; - }; - std::unordered_map visualSubmeshRanges; - std::vector submeshGPU; - - for ( auto& [visual, entries] : visualSubmeshMap ) { - UINT start = static_cast(submeshGPU.size()); - for ( auto& entry : entries ) - submeshGPU.push_back( entry ); - visualSubmeshRanges[visual] = { start, static_cast(entries.size()) }; - } - - // --- 3. Build VobGPUData --- - std::vector vobGPU; - vobGPU.reserve( m_StaticVobs.size() ); - - for ( size_t i = 0; i < m_StaticVobs.size(); i++ ) { - VobInfo* v = m_StaticVobs[i]; - auto* visual = reinterpret_cast(v->VisualInfo); - - VobGPUData data = {}; - - // AABB from vob's bounding box - DirectX::BoundingBox bb = Frustum::BBoxFromzTBBox3D( v->Vob->GetBBox() ); - data.aabbCenter = bb.Center; - data.aabbExtent = bb.Extents; - - data.world = v->WorldMatrix; - data.prevWorld = v->WorldMatrix; // for static vobs, prev == current - data.color = v->GroundColor; - - // Bake animation properties - zTAnimationMode aniMode = v->Vob->GetVisualAniMode(); - if ( aniMode != zVISUAL_ANIMODE_NONE ) { - data.aniModeStrength = v->Vob->GetVisualAniModeStrength(); - data.canBeAffectedByPlayer = (!v->Vob->GetDynColl() ? 1.0f : 0.0f); - } else { - data.aniModeStrength = 0.0f; - data.canBeAffectedByPlayer = 0.0f; - } - - // Look up submesh range for this visual - auto it = visualSubmeshRanges.find( visual ); - if ( it != visualSubmeshRanges.end() ) { - data.submeshStart = it->second.start; - data.submeshCount = it->second.count; - } - - vobGPU.push_back( data ); - } - - // --- 4. Upload to GPU --- - auto* device = GetDevice().Get(); - auto* context = GetContext().Get(); - - // VobGPUData buffer (SRV only, DEFAULT usage) - m_VobGPUBuffer = std::make_unique>(); - m_VobGPUBuffer->Init( device, static_cast(vobGPU.size()), false, false ); - m_VobGPUBuffer->UpdateBufferDefault( context, vobGPU.data(), static_cast(vobGPU.size()) ); - - // SubmeshGPUData buffer (SRV only, DEFAULT usage) - m_SubmeshGPUBuffer = std::make_unique>(); - m_SubmeshGPUBuffer->Init( device, static_cast(submeshGPU.size()), false, false ); - m_SubmeshGPUBuffer->UpdateBufferDefault( context, submeshGPU.data(), static_cast(submeshGPU.size()) ); - - // Instance buffer (UAV for CS writes, SRV for VS reads) - UINT instanceCapacity = std::max( m_TotalMaxInstances, 1u ); - m_InstanceBufferGPU = std::make_unique>(); - m_InstanceBufferGPU->Init( device, instanceCapacity, false, true ); - - // Merged indirect args buffer (UAV for CS atomics + indirect draw) - UINT argsSize = static_cast(mergedArgs.size() * sizeof( D3D11_DRAW_INDEXED_INSTANCED_INDIRECT_ARGS )); - m_MergedIndirectArgs = std::make_unique(); - m_MergedIndirectArgs->Init( - mergedArgs.data(), argsSize, - D3D11IndirectBuffer::B_UNORDERED_ACCESS, - D3D11IndirectBuffer::U_DEFAULT, - D3D11IndirectBuffer::CA_NONE ); - - // Template buffer for per-frame reset (stores args with InstanceCount=0) - m_MergedArgsReset = mergedArgs; // already has InstanceCount=0 - - D3D11_BUFFER_DESC templateDesc = {}; - templateDesc.ByteWidth = argsSize; - templateDesc.Usage = D3D11_USAGE_DEFAULT; - templateDesc.BindFlags = 0; - templateDesc.MiscFlags = D3D11_RESOURCE_MISC_DRAWINDIRECT_ARGS; - - D3D11_SUBRESOURCE_DATA templateData = {}; - templateData.pSysMem = mergedArgs.data(); - device->CreateBuffer( &templateDesc, &templateData, m_IndirectArgsTemplate.ReleaseAndGetAddressOf() ); - - // Cull constant buffer - CullConstants initCB = {}; - m_CullConstantBuffer = std::make_unique( sizeof( CullConstants ), &initCB ); - - // Update the instance ID buffer to match the new total capacity - if ( m_TotalMaxInstances > 0 ) { - std::vector instanceIds( m_TotalMaxInstances ); - for ( uint32_t i = 0; i < m_TotalMaxInstances; i++ ) - instanceIds[i] = i; - - m_GlobalInstanceIdBuffer = std::make_unique(); - m_GlobalInstanceIdBuffer->Init( - instanceIds.data(), - static_cast(instanceIds.size() * sizeof( uint32_t )), - D3D11VertexBuffer::B_VERTEXBUFFER, - D3D11VertexBuffer::U_IMMUTABLE, - D3D11VertexBuffer::CA_NONE ); - } - - LogInfo() << "GPU culling: " << vobGPU.size() << " vobs, " - << submeshGPU.size() << " submesh entries, " - << mergedArgs.size() << " indirect args, " - << m_TotalMaxInstances << " max instances"; -} - void D3D11GraphicsEngine::CreateHiZResources() { auto* device = GetDevice().Get(); HRESULT hr; @@ -8576,588 +8064,6 @@ void D3D11GraphicsEngine::BuildHiZPyramid() { context->CSSetShader( nullptr, nullptr, 0 ); } -void D3D11GraphicsEngine::BuildSceneTextureAtlasses() { - for ( size_t i = 0; i < TEXTURE_ATLAS_MAX; i++ ) { - m_TextureAtlasses[(DXGI_FORMAT)i].Destroy(); - } - m_TextureAtlasLookup.clear(); - m_AtlasDrawGroups.clear(); - - if ( !SupportTextureAtlases || !Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.FeatureSet.EnableAtlasStaticVobs ) { - return; - } - - struct TextureInfo { - zCTexture* gothicTexture; - DXGI_FORMAT Format; - Microsoft::WRL::ComPtr Texture2D; - }; - std::unordered_set seenTextures; - std::vector uniqueTextures; - - for ( auto vobInfo : m_StaticVobs ) { - for ( auto& byTex : reinterpret_cast(vobInfo->VisualInfo)->MeshesByTexture ) { - zCTexture* tex = byTex.first.Texture; - if ( !tex ) { - tex = byTex.first.Material->GetTexture(); - } - - if ( !tex ) { - tex = byTex.first.Material->GetAniTexture(); - } - - if ( !tex || !seenTextures.insert( tex ).second ) { - LogError() << "Texture not found for visual " << vobInfo->VisualInfo->VisualName; - continue; // skip nulls and duplicates - } - - auto cachedState = tex->CacheIn( -1 ); - if ( cachedState != zRES_CACHED_IN ) { - LogError() << "Texture " << tex->GetName() << " was not cached in"; - continue; - } - - auto surface = tex->GetSurface(); - if ( !surface || !surface->IsSurfaceReady() ) { - LogError() << "Texture " << tex->GetName() << " surface not ready"; - continue; - } - - auto engineTex = surface->GetEngineTexture(); - if ( !engineTex ) { - LogError() << "Texture " << tex->GetName() << " no engine texture"; - continue; - } - - D3D11_TEXTURE2D_DESC desc; - engineTex->GetTextureObject()->GetDesc( &desc ); - if ( desc.Format < 1 || desc.Format >= TEXTURE_ATLAS_MAX ) { - LogError() << "Texture " << tex->GetName() << " has unsupported format for atlas: " << desc.Format; - continue; - } - uniqueTextures.push_back( { tex, desc.Format, engineTex->GetTextureObject() } ); - } - } - - // Sort by format so textures with the same format are contiguous - std::sort( uniqueTextures.begin(), uniqueTextures.end(), []( const TextureInfo& a, const TextureInfo& b ) { - return a.Format < b.Format; - } ); - - // Create atlases per format group (process ALL groups including last) - size_t rangeStart = 0; - while ( rangeStart < uniqueTextures.size() ) { - DXGI_FORMAT fmt = uniqueTextures[rangeStart].Format; - size_t rangeEnd = rangeStart; - while ( rangeEnd < uniqueTextures.size() && uniqueTextures[rangeEnd].Format == fmt ) - rangeEnd++; - - std::vector texPtrs; - texPtrs.reserve( rangeEnd - rangeStart ); - for ( size_t i = rangeStart; i < rangeEnd; i++ ) - texPtrs.push_back( uniqueTextures[i].Texture2D.Get() ); - - std::basic_string_view txView( texPtrs.data(), texPtrs.size() ); - - TextureManager::AtlasResult atlas = TextureManager::CreateAtlasArray( GetDevice().Get(), GetContext().Get(), txView, 2048, 6 ); - - // Map descriptors back to Gothic texture pointers - for ( size_t i = 0; i < texPtrs.size(); i++ ) { - size_t srcIdx = rangeStart + i; - m_TextureAtlasLookup[uniqueTextures[srcIdx].gothicTexture] = { - fmt, atlas.descriptors[i] - }; - } - - m_TextureAtlasses[fmt] = atlas; - rangeStart = rangeEnd; - } - - LogInfo() << "Atlas: " << uniqueTextures.size() << " unique textures, " << m_TextureAtlasLookup.size() << " mapped"; - - // Build global VB/IB and indirect args from atlas data - BuildStaticGeometryBuffers(); - - // Build GPU structured buffers for compute shader culling - // currently only used with static vobs when we do atlases. - BuildGPUCullingBuffers(); -} - -void D3D11GraphicsEngine::BuildWorldMeshTextureAtlasses() { - for ( size_t i = 0; i < TEXTURE_ATLAS_MAX; i++ ) { - m_WorldMeshDiffuseAtlasses[(DXGI_FORMAT)i].Destroy(); - m_WorldMeshNormalAtlasses[(DXGI_FORMAT)i].Destroy(); - m_WorldMeshFxAtlasses[(DXGI_FORMAT)i].Destroy(); - } - m_WorldMeshDiffuseAtlasLookup.clear(); - m_WorldMeshNormalAtlasLookup.clear(); - m_WorldMeshFxAtlasLookup.clear(); - m_WorldMeshAtlasDrawGroups.clear(); - m_WorldMeshAtlasedSubmeshes.clear(); - m_WorldMeshGlobalVertexBuffer.reset(); - m_WorldMeshGlobalIndexBuffer.reset(); - m_WorldMeshGlobalInstanceIdBuffer.reset(); - m_WorldMeshSubmeshBuffer.reset(); - - if ( !SupportTextureAtlases || !Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.FeatureSet.EnableAtlasWorldMesh ) { - return; - } - - // --- 1. Collect unique diffuse textures from world mesh --- - struct DiffuseTextureInfo { - zCTexture* gothicTexture; - DXGI_FORMAT Format; - Microsoft::WRL::ComPtr Texture2D; - }; - std::unordered_set seenDiffuse; - std::vector uniqueDiffuse; - - struct AuxTextureInfo { - D3D11Texture* engineTexture; - DXGI_FORMAT Format; - Microsoft::WRL::ComPtr Texture2D; - }; - std::unordered_set seenNormal, seenFx; - std::vector uniqueNormals, uniqueFx; - - auto& worldSections = Engine::GAPI->GetWorldSections(); - for ( auto& [x, row] : worldSections ) { - for ( auto& [y, section] : row ) { - for ( auto const& [meshKey, worldMeshInfo] : section.WorldMeshes ) { - if ( !meshKey.Material ) continue; - - // Skip animated textures - zCTexture* baseTex = meshKey.Material->GetTextureSingle(); - if ( !baseTex ) continue; - unsigned char texFlags = *reinterpret_cast( - reinterpret_cast(baseTex) + GothicMemoryLocations::zCTexture::Offset_Flags ); - if ( texFlags & GothicMemoryLocations::zCTexture::Mask_FlagIsAnimated ) - continue; - - // Skip alpha-blended (only opaque + alpha-test) - int alphaFunc = meshKey.Material->GetAlphaFunc(); - if ( alphaFunc > zMAT_ALPHA_FUNC_NONE && alphaFunc != zMAT_ALPHA_FUNC_TEST ) - continue; - - // Skip non-standard materials (water, portals, etc.) - if ( meshKey.Info && meshKey.Info->MaterialType != MaterialInfo::MT_None ) - continue; - - zCTexture* tex = baseTex; - auto cachedState = tex->CacheIn( -1 ); - if ( cachedState != zRES_CACHED_IN ) continue; - - auto surface = tex->GetSurface(); - if ( !surface || !surface->IsSurfaceReady() ) continue; - - auto engineTex = surface->GetEngineTexture(); - if ( !engineTex ) continue; - - // Diffuse - if ( seenDiffuse.insert( tex ).second ) { - D3D11_TEXTURE2D_DESC desc; - engineTex->GetTextureObject()->GetDesc( &desc ); - if ( desc.Format >= 1 && desc.Format < TEXTURE_ATLAS_MAX ) { - uniqueDiffuse.push_back( { tex, desc.Format, engineTex->GetTextureObject() } ); - } - } - - // Normal map - D3D11Texture* normalTex = surface->GetNormalmap(); - if ( normalTex && seenNormal.insert( normalTex ).second ) { - D3D11_TEXTURE2D_DESC desc; - normalTex->GetTextureObject()->GetDesc( &desc ); - if ( desc.Format >= 1 && desc.Format < TEXTURE_ATLAS_MAX ) { - uniqueNormals.push_back( { normalTex, desc.Format, normalTex->GetTextureObject() } ); - } - } - - // FX map - D3D11Texture* fxTex = surface->GetFxMap(); - if ( fxTex && seenFx.insert( fxTex ).second ) { - D3D11_TEXTURE2D_DESC desc; - fxTex->GetTextureObject()->GetDesc( &desc ); - if ( desc.Format >= 1 && desc.Format < TEXTURE_ATLAS_MAX ) { - uniqueFx.push_back( { fxTex, desc.Format, fxTex->GetTextureObject() } ); - } - } - } - } - } - - // --- 2. Build atlases per format group for each texture type --- - auto buildDiffuseAtlases = [&]() { - std::sort( uniqueDiffuse.begin(), uniqueDiffuse.end(), - []( const DiffuseTextureInfo& a, const DiffuseTextureInfo& b ) { return a.Format < b.Format; } ); - - size_t rangeStart = 0; - while ( rangeStart < uniqueDiffuse.size() ) { - DXGI_FORMAT fmt = uniqueDiffuse[rangeStart].Format; - size_t rangeEnd = rangeStart; - while ( rangeEnd < uniqueDiffuse.size() && uniqueDiffuse[rangeEnd].Format == fmt ) - rangeEnd++; - - std::vector texPtrs; - texPtrs.reserve( rangeEnd - rangeStart ); - for ( size_t i = rangeStart; i < rangeEnd; i++ ) - texPtrs.push_back( uniqueDiffuse[i].Texture2D.Get() ); - - std::basic_string_view txView( texPtrs.data(), texPtrs.size() ); - TextureManager::AtlasResult atlas = TextureManager::CreateAtlasArray( - GetDevice().Get(), GetContext().Get(), txView, 2048, 6 ); - - for ( size_t i = 0; i < texPtrs.size(); i++ ) { - m_WorldMeshDiffuseAtlasLookup[uniqueDiffuse[rangeStart + i].gothicTexture] = { - fmt, atlas.descriptors[i] - }; - } - m_WorldMeshDiffuseAtlasses[fmt] = atlas; - rangeStart = rangeEnd; - } - }; - - auto buildAuxAtlases = []( std::vector& textures, - std::unordered_map& lookup, - std::array& atlasses, - ID3D11Device* device, ID3D11DeviceContext* context ) { - std::sort( textures.begin(), textures.end(), - []( const AuxTextureInfo& a, const AuxTextureInfo& b ) { return a.Format < b.Format; } ); - - size_t rangeStart = 0; - while ( rangeStart < textures.size() ) { - DXGI_FORMAT fmt = textures[rangeStart].Format; - size_t rangeEnd = rangeStart; - while ( rangeEnd < textures.size() && textures[rangeEnd].Format == fmt ) - rangeEnd++; - - std::vector texPtrs; - texPtrs.reserve( rangeEnd - rangeStart ); - for ( size_t i = rangeStart; i < rangeEnd; i++ ) - texPtrs.push_back( textures[i].Texture2D.Get() ); - - std::basic_string_view txView( texPtrs.data(), texPtrs.size() ); - TextureManager::AtlasResult atlas = TextureManager::CreateAtlasArray( - device, context, txView, 2048, 6 ); - - for ( size_t i = 0; i < texPtrs.size(); i++ ) { - lookup[textures[rangeStart + i].engineTexture] = { - fmt, atlas.descriptors[i] - }; - } - atlasses[fmt] = atlas; - rangeStart = rangeEnd; - } - }; - - buildDiffuseAtlases(); - buildAuxAtlases( uniqueNormals, m_WorldMeshNormalAtlasLookup, m_WorldMeshNormalAtlasses, - GetDevice().Get(), GetContext().Get() ); - buildAuxAtlases( uniqueFx, m_WorldMeshFxAtlasLookup, m_WorldMeshFxAtlasses, - GetDevice().Get(), GetContext().Get() ); - - LogInfo() << "World Mesh Atlas: " << uniqueDiffuse.size() << " diffuse, " - << uniqueNormals.size() << " normal, " << uniqueFx.size() << " fx textures"; - - BuildStaticWorldMeshBuffers(); -} - -void D3D11GraphicsEngine::BuildStaticWorldMeshBuffers() { - std::vector allVertices; - std::vector allIndices; - std::vector submeshGPU; - - // Group by diffuse atlas format (normal/fx may differ but we key on diffuse) - std::map groupsByFormat; - - std::unordered_set processedMeshes; - - // Pre-count total vertices/indices to avoid incremental reallocation - { - size_t totalVertices = 0, totalIndices = 0, totalSubmeshes = 0; - auto& ws = Engine::GAPI->GetWorldSections(); - for ( auto& [x, row] : ws ) { - for ( auto& [y, section] : row ) { - for ( auto const& [meshKey, worldMeshInfo] : section.WorldMeshes ) { - if ( !meshKey.Material ) continue; - zCTexture* tex = meshKey.Material->GetTextureSingle(); - if ( m_WorldMeshDiffuseAtlasLookup.find( tex ) != m_WorldMeshDiffuseAtlasLookup.end() ) { - totalVertices += worldMeshInfo->Vertices.size(); - totalIndices += worldMeshInfo->Indices.size(); - totalSubmeshes++; - } - } - } - } - allVertices.reserve( totalVertices ); - allIndices.reserve( totalIndices ); - submeshGPU.reserve( totalSubmeshes ); - } - - auto& worldSections = Engine::GAPI->GetWorldSections(); - for ( auto& [x, row] : worldSections ) { - for ( auto& [y, section] : row ) { - for ( auto const& [meshKey, worldMeshInfo] : section.WorldMeshes ) { - if ( !meshKey.Material ) continue; - - zCTexture* tex = meshKey.Material->GetTextureSingle(); - auto diffIt = m_WorldMeshDiffuseAtlasLookup.find( tex ); - if ( diffIt == m_WorldMeshDiffuseAtlasLookup.end() ) - continue; // not in atlas - - MeshInfo* mi = worldMeshInfo; - if ( !processedMeshes.insert( mi ).second ) - continue; // already added - - m_WorldMeshAtlasedSubmeshes.insert( mi ); - - const TextureAtlasLookup& diffLookup = diffIt->second; - auto& group = groupsByFormat[diffLookup.atlasFormat]; - group.format = diffLookup.atlasFormat; - - UINT baseVertex = static_cast(allVertices.size()); - UINT startIndex = static_cast(allIndices.size()); - - allVertices.insert( allVertices.end(), mi->Vertices.begin(), mi->Vertices.end() ); - allIndices.insert( allIndices.end(), mi->Indices.begin(), mi->Indices.end() ); - - // Build GPU descriptor for this submesh - WorldMeshSubmeshGPUData gpuData = {}; - gpuData.diffuseSlice = diffLookup.descriptor.slice; - gpuData.dUStart = diffLookup.descriptor.uStart; - gpuData.dVStart = diffLookup.descriptor.vStart; - gpuData.dUEnd = diffLookup.descriptor.uEnd; - gpuData.dVEnd = diffLookup.descriptor.vEnd; - - UINT flags = 0; - - // Normal map lookup - auto surface = tex->GetSurface(); - if ( surface ) { - D3D11Texture* normalTex = surface->GetNormalmap(); - if ( normalTex ) { - auto normIt = m_WorldMeshNormalAtlasLookup.find( normalTex ); - if ( normIt != m_WorldMeshNormalAtlasLookup.end() ) { - gpuData.normalSlice = normIt->second.descriptor.slice; - gpuData.nUStart = normIt->second.descriptor.uStart; - gpuData.nVStart = normIt->second.descriptor.vStart; - gpuData.nUEnd = normIt->second.descriptor.uEnd; - gpuData.nVEnd = normIt->second.descriptor.vEnd; - flags |= 1; // HAS_NORMAL - } - } - - D3D11Texture* fxTex = surface->GetFxMap(); - if ( fxTex ) { - auto fxIt = m_WorldMeshFxAtlasLookup.find( fxTex ); - if ( fxIt != m_WorldMeshFxAtlasLookup.end() ) { - gpuData.fxSlice = fxIt->second.descriptor.slice; - gpuData.fUStart = fxIt->second.descriptor.uStart; - gpuData.fVStart = fxIt->second.descriptor.vStart; - gpuData.fUEnd = fxIt->second.descriptor.uEnd; - gpuData.fVEnd = fxIt->second.descriptor.vEnd; - flags |= 2; // HAS_FX - } - } - } - - // Alpha test flag - int alphaFunc = meshKey.Material->GetAlphaFunc(); - if ( alphaFunc == zMAT_ALPHA_FUNC_TEST || tex->HasAlphaChannel() ) { - flags |= 4; // ALPHA_TEST - } - - gpuData.flags = flags; - - UINT submeshIndex = static_cast(submeshGPU.size()); - submeshGPU.push_back( gpuData ); - - // Indirect draw arg — InstanceCount=1 (world mesh is not instanced), - // StartInstanceLocation = submeshIndex (used as VS instance remap) - D3D11_DRAW_INDEXED_INSTANCED_INDIRECT_ARGS args = {}; - args.IndexCountPerInstance = static_cast(mi->Indices.size()); - args.InstanceCount = 1; - args.StartIndexLocation = startIndex; - args.BaseVertexLocation = static_cast(baseVertex); - args.StartInstanceLocation = submeshIndex; - group.indirectArgs.push_back( args ); - } - } - } - - if ( allVertices.empty() ) { - LogWarn() << "BuildStaticWorldMeshBuffers: No world mesh vertices for atlas"; - return; - } - - // Create global vertex buffer (IMMUTABLE) - m_WorldMeshGlobalVertexBuffer = std::make_unique(); - m_WorldMeshGlobalVertexBuffer->Init( - allVertices.data(), - static_cast(allVertices.size() * sizeof( ExVertexStruct )), - D3D11VertexBuffer::B_VERTEXBUFFER, - D3D11VertexBuffer::U_IMMUTABLE, - D3D11VertexBuffer::CA_NONE ); - - // Create global index buffer (IMMUTABLE) - m_WorldMeshGlobalIndexBuffer = std::make_unique(); - m_WorldMeshGlobalIndexBuffer->Init( - allIndices.data(), - static_cast(allIndices.size() * sizeof( VERTEX_INDEX )), - D3D11VertexBuffer::B_INDEXBUFFER, - D3D11VertexBuffer::U_IMMUTABLE, - D3D11VertexBuffer::CA_NONE ); - - // Instance ID buffer: just {0,1,2,...} so the VS can read submeshIdx - // For world mesh, each submesh draws exactly 1 instance, - // and StartInstanceLocation = submeshIndex in the indirect args - UINT maxIds = static_cast(submeshGPU.size()); - if ( maxIds < 256 ) maxIds = 256; - std::vector instanceIds( maxIds ); - for ( uint32_t i = 0; i < maxIds; i++ ) - instanceIds[i] = i; - - m_WorldMeshGlobalInstanceIdBuffer = std::make_unique(); - m_WorldMeshGlobalInstanceIdBuffer->Init( - instanceIds.data(), - static_cast(instanceIds.size() * sizeof( uint32_t )), - D3D11VertexBuffer::B_VERTEXBUFFER, - D3D11VertexBuffer::U_IMMUTABLE, - D3D11VertexBuffer::CA_NONE ); - - // Structured buffer for submesh GPU data - auto* device = GetDevice().Get(); - auto* context = GetContext().Get(); - m_WorldMeshSubmeshBuffer = std::make_unique>(); - m_WorldMeshSubmeshBuffer->Init( device, static_cast(submeshGPU.size()), false, false ); - m_WorldMeshSubmeshBuffer->UpdateBufferDefault( context, submeshGPU.data(), static_cast(submeshGPU.size()) ); - - // Move groups and create indirect buffers - m_WorldMeshAtlasDrawGroups.clear(); - for ( auto& [fmt, group] : groupsByFormat ) { - if ( group.indirectArgs.empty() ) - continue; - - UINT bufSize = static_cast(group.indirectArgs.size() * sizeof( D3D11_DRAW_INDEXED_INSTANCED_INDIRECT_ARGS )); - group.indirectBuffer = std::make_unique(); - group.indirectBuffer->Init( - group.indirectArgs.data(), bufSize, - D3D11IndirectBuffer::B_VERTEXBUFFER, - D3D11IndirectBuffer::U_IMMUTABLE, - D3D11IndirectBuffer::CA_NONE ); - - m_WorldMeshAtlasDrawGroups.push_back( std::move( group ) ); - } - - LogInfo() << "World Mesh Atlas geometry: " << allVertices.size() << " vertices, " - << allIndices.size() << " indices, " - << m_WorldMeshAtlasDrawGroups.size() << " format groups, " - << submeshGPU.size() << " submeshes"; -} - -XRESULT D3D11GraphicsEngine::DrawWorldMesh_Atlas() { - if ( m_WorldMeshAtlasDrawGroups.empty() || !m_WorldMeshGlobalVertexBuffer || !m_WorldMeshGlobalIndexBuffer ) - return XR_SUCCESS; - - auto _ = RecordGraphicsEvent( L"DrawWorldMesh_Atlas" ); - auto& context = GetContext(); - - // Reset render states to opaque defaults (depth write on, no blending, etc.) - SetDefaultStates(); - - XMMATRIX view = Engine::GAPI->GetViewMatrixXM(); - Engine::GAPI->SetViewTransformXM( view ); - Engine::GAPI->ResetWorldTransform(); - - context->DSSetShader( nullptr, nullptr, 0 ); - context->HSSetShader( nullptr, nullptr, 0 ); - - // --- 1. Bind global geometry (once) --- - UINT strides[2] = { sizeof( ExVertexStruct ), sizeof( uint32_t ) }; - UINT offsets[2] = { 0, 0 }; - ID3D11Buffer* vbs[2] = { - m_WorldMeshGlobalVertexBuffer->GetVertexBuffer().Get(), - m_WorldMeshGlobalInstanceIdBuffer->GetVertexBuffer().Get() - }; - context->IASetVertexBuffers( 0, 2, vbs, strides, offsets ); - context->IASetIndexBuffer( m_WorldMeshGlobalIndexBuffer->GetVertexBuffer().Get(), VERTEX_INDEX_DXGI_FORMAT, 0 ); - context->IASetPrimitiveTopology( D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST ); - - // --- 2. Bind submesh StructuredBuffer to VS t1 --- - ID3D11ShaderResourceView* submeshSRV = m_WorldMeshSubmeshBuffer->GetSRV(); - context->VSSetShaderResources( 1, 1, &submeshSRV ); - - // --- 3. Set vertex shader --- - SetActiveVertexShader( VShaderID::VS_ExWorldAtlas ); - SetupVS_ExMeshDrawCall(); - SetupVS_ExConstantBuffer(); - - // World mesh is already in world space — pass identity world matrix - /* -- Atlas shader doesn't use this - ActiveVS->GetConstantBuffer()[1]->UpdateBuffer( &XMMatrixIdentity() ); - ActiveVS->GetConstantBuffer()[1]->BindToVertexShader( 1 ); - */ - ActiveVS->Apply(); - - // --- 4. Set pixel shader + constant buffers --- - SetActivePixelShader( PShaderID::PS_WorldAtlas ); - - ActivePS->GetConstantBuffer()[0]->UpdateBuffer( - &Engine::GAPI->GetRendererState().GraphicsState ); - ActivePS->GetConstantBuffer()[0]->BindToPixelShader( 0 ); - - GSky* sky = Engine::GAPI->GetSky(); - ActivePS->GetConstantBuffer()[1]->UpdateBuffer( &sky->GetAtmosphereCB() ); - ActivePS->GetConstantBuffer()[1]->BindToPixelShader( 1 ); - - MaterialInfo defMaterial{}; - ActivePS->GetConstantBuffer()[2]->UpdateBuffer( &defMaterial.buffer ); - ActivePS->GetConstantBuffer()[2]->BindToPixelShader( 2 ); - - InfiniteRangeConstantBuffer->BindToPixelShader( 3 ); - - // Bind reflection cube - context->PSSetShaderResources( 4, 1, ReflectionCube.GetAddressOf() ); - - ActivePS->Apply(); - - // --- 5. Draw per format group --- - for ( auto& group : m_WorldMeshAtlasDrawGroups ) { - // Bind atlas textures for this format group - // Diffuse atlas -> PS t0 - ID3D11ShaderResourceView* diffuseSRV = m_WorldMeshDiffuseAtlasses[group.format].atlasSRV; - if ( !diffuseSRV ) continue; - - // Find normal/FX atlases — they may be a different format, so bind all available - // We bind the first non-null atlas of each type since format grouping is per-diffuse - ID3D11ShaderResourceView* normalSRV = nullptr; - ID3D11ShaderResourceView* fxSRV = nullptr; - for ( size_t i = 0; i < TEXTURE_ATLAS_MAX; i++ ) { - if ( !normalSRV && m_WorldMeshNormalAtlasses[i].atlasSRV ) - normalSRV = m_WorldMeshNormalAtlasses[i].atlasSRV; - if ( !fxSRV && m_WorldMeshFxAtlasses[i].atlasSRV ) - fxSRV = m_WorldMeshFxAtlasses[i].atlasSRV; - } - - ID3D11ShaderResourceView* psSRVs[3] = { diffuseSRV, normalSRV, fxSRV }; - context->PSSetShaderResources( 0, 3, psSRVs ); - - // DrawMultiIndexedInstancedIndirect falls back to individual DrawIndexedInstancedIndirect - // calls via Stub_DrawMultiIndexedInstancedIndirect if hardware doesn't support MDI - DrawMultiIndexedInstancedIndirect( - context.Get(), - static_cast(group.indirectArgs.size()), - group.indirectBuffer->GetIndirectBuffer().Get(), - 0, - sizeof( D3D11_DRAW_INDEXED_INSTANCED_INDIRECT_ARGS ) ); - } - - // Unbind - ID3D11ShaderResourceView* nullSRV = nullptr; - context->VSSetShaderResources( 1, 1, &nullSRV ); - - return XR_SUCCESS; -} - void D3D11GraphicsEngine::CacheWorldStaticVobs() { static std::vector _1; @@ -9223,11 +8129,11 @@ void D3D11GraphicsEngine::OnWorldLoaded() { CacheWorldStaticVobs(); - // --- Atlas building: collect unique textures, create Texture2DArray atlases, map descriptors --- - BuildSceneTextureAtlasses(); + // --- Build VOB texture atlases: collect unique textures, create Texture2DArray atlases --- + m_VobAtlasPass->Build(); - // --- World mesh atlas: collect textures, build atlases, merge geometry --- - BuildWorldMeshTextureAtlasses(); + // --- Build world mesh atlas: collect textures, build atlases, merge geometry --- + m_MeshAtlasPass->Build(); } void D3D11GraphicsEngine::StoreVobPreviousTransforms() { diff --git a/D3D11Engine/D3D11LegacyDeferredShading.cpp b/D3D11Engine/D3D11LegacyDeferredShading.cpp index a2dea70e..e274094e 100644 --- a/D3D11Engine/D3D11LegacyDeferredShading.cpp +++ b/D3D11Engine/D3D11LegacyDeferredShading.cpp @@ -37,13 +37,10 @@ XRESULT D3D11LegacyDeferredShading::DrawPointlightLights( if ( settings.LimitLightIntesity ) { Engine::GAPI->GetRendererState().BlendState.BlendOp = GothicBlendStateInfo::BO_BLEND_OP_MAX; } - Engine::GAPI->GetRendererState().BlendState.SetDirty(); Engine::GAPI->GetRendererState().DepthState.DepthWriteEnabled = false; - Engine::GAPI->GetRendererState().DepthState.SetDirty(); Engine::GAPI->GetRendererState().RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_BACK; - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); graphicsEngine->SetupVS_ExMeshDrawCall(); graphicsEngine->SetupVS_ExConstantBuffer(); @@ -124,16 +121,12 @@ XRESULT D3D11LegacyDeferredShading::DrawPointlightLights( if ( Engine::GAPI->GetRendererState().DepthState.DepthBufferEnabled ) { Engine::GAPI->GetRendererState().DepthState.DepthBufferEnabled = false; Engine::GAPI->GetRendererState().RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_FRONT; - Engine::GAPI->GetRendererState().DepthState.SetDirty(); - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); graphicsEngine->UpdateRenderStates(); } } else { if ( !Engine::GAPI->GetRendererState().DepthState.DepthBufferEnabled ) { Engine::GAPI->GetRendererState().DepthState.DepthBufferEnabled = true; Engine::GAPI->GetRendererState().RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_BACK; - Engine::GAPI->GetRendererState().DepthState.SetDirty(); - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); graphicsEngine->UpdateRenderStates(); } } diff --git a/D3D11Engine/D3D11MeshAtlasPass.cpp b/D3D11Engine/D3D11MeshAtlasPass.cpp new file mode 100644 index 00000000..cbe88563 --- /dev/null +++ b/D3D11Engine/D3D11MeshAtlasPass.cpp @@ -0,0 +1,495 @@ +#include "D3D11MeshAtlasPass.h" +#include "D3D11GraphicsEngine.h" + +#include "D3D11ShaderManager.h" +#include "D3D11VShader.h" +#include "D3D11PShader.h" +#include "D3D11ConstantBuffer.h" +#include "GothicAPI.h" +#include "GSky.h" +#include "RenderToTextureBuffer.h" +#include "WorldObjects.h" +#include "VertexTypes.h" +#include "zCTexture.h" +#include "zCMaterial.h" + +#include +#include + +// ----- globals defined in D3D11GraphicsEngine.cpp ----- +extern bool SupportTextureAtlases; +namespace { + constexpr DXGI_FORMAT VERTEX_INDEX_DXGI_FORMAT = sizeof( VERTEX_INDEX ) == sizeof( unsigned short ) ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R32_UINT; +} + +typedef void( __cdecl* PFN_DRAWMULTIINDEXEDINSTANCEDINDIRECT )( + ID3D11DeviceContext* context, unsigned int drawCount, + ID3D11Buffer* buffer, unsigned int alignedByteOffsetForArgs, + unsigned int alignedByteStrideForArgs ); +extern PFN_DRAWMULTIINDEXEDINSTANCEDINDIRECT DrawMultiIndexedInstancedIndirect; + +// ------------------------------------------------------- + +D3D11MeshAtlasPass::D3D11MeshAtlasPass( D3D11GraphicsEngine* engine ) + : m_Engine( engine ) { +} + +// ============================================================ +// Build – entry point called from OnWorldLoaded +// ============================================================ +void D3D11MeshAtlasPass::Build() { + // Reset everything + for ( size_t i = 0; i < TEXTURE_ATLAS_MAX; i++ ) { + m_WorldMeshDiffuseAtlasses[(DXGI_FORMAT)i].Destroy(); + m_WorldMeshNormalAtlasses[(DXGI_FORMAT)i].Destroy(); + m_WorldMeshFxAtlasses[(DXGI_FORMAT)i].Destroy(); + } + m_WorldMeshDiffuseAtlasLookup.clear(); + m_WorldMeshNormalAtlasLookup.clear(); + m_WorldMeshFxAtlasLookup.clear(); + m_WorldMeshAtlasDrawGroups.clear(); + m_WorldMeshAtlasedSubmeshes.clear(); + m_WorldMeshGlobalVertexBuffer.reset(); + m_WorldMeshGlobalIndexBuffer.reset(); + m_WorldMeshGlobalInstanceIdBuffer.reset(); + m_WorldMeshSubmeshBuffer.reset(); + + if ( !SupportTextureAtlases || + !Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.FeatureSet.EnableAtlasWorldMesh ) { + return; + } + + BuildTextureAtlasses(); + + if ( m_WorldMeshDiffuseAtlasLookup.empty() ) + return; + + BuildGeometryBuffers(); +} + +// ============================================================ +// BuildTextureAtlasses +// ============================================================ +void D3D11MeshAtlasPass::BuildTextureAtlasses() { + struct DiffuseTextureInfo { + zCTexture* gothicTexture; + DXGI_FORMAT Format; + Microsoft::WRL::ComPtr Texture2D; + }; + struct AuxTextureInfo { + D3D11Texture* engineTexture; + DXGI_FORMAT Format; + Microsoft::WRL::ComPtr Texture2D; + }; + + std::unordered_set seenDiffuse; + std::unordered_set seenNormal, seenFx; + std::vector uniqueDiffuse; + std::vector uniqueNormals, uniqueFx; + + auto& worldSections = Engine::GAPI->GetWorldSections(); + for ( auto& [x, row] : worldSections ) { + for ( auto& [y, section] : row ) { + for ( auto const& [meshKey, worldMeshInfo] : section.WorldMeshes ) { + if ( !meshKey.Material ) continue; + + // Skip animated textures + zCTexture* baseTex = meshKey.Material->GetTextureSingle(); + if ( !baseTex ) continue; + unsigned char texFlags = *reinterpret_cast( + reinterpret_cast(baseTex) + GothicMemoryLocations::zCTexture::Offset_Flags ); + if ( texFlags & GothicMemoryLocations::zCTexture::Mask_FlagIsAnimated ) + continue; + + // Only opaque + alpha-test + int alphaFunc = meshKey.Material->GetAlphaFunc(); + if ( alphaFunc > zMAT_ALPHA_FUNC_NONE && alphaFunc != zMAT_ALPHA_FUNC_TEST ) + continue; + + // Skip non-standard materials (water, portals, etc.) + if ( meshKey.Info && meshKey.Info->MaterialType != MaterialInfo::MT_None ) + continue; + + zCTexture* tex = baseTex; + auto cachedState = tex->CacheIn( -1 ); + if ( cachedState != zRES_CACHED_IN ) continue; + + auto surface = tex->GetSurface(); + if ( !surface || !surface->IsSurfaceReady() ) continue; + + auto engineTex = surface->GetEngineTexture(); + if ( !engineTex ) continue; + + // Diffuse + if ( seenDiffuse.insert( tex ).second ) { + D3D11_TEXTURE2D_DESC desc; + engineTex->GetTextureObject()->GetDesc( &desc ); + if ( desc.Format >= 1 && desc.Format < TEXTURE_ATLAS_MAX ) + uniqueDiffuse.push_back( { tex, desc.Format, engineTex->GetTextureObject() } ); + } + + // Normal map + D3D11Texture* normalTex = surface->GetNormalmap(); + if ( normalTex && seenNormal.insert( normalTex ).second ) { + D3D11_TEXTURE2D_DESC desc; + normalTex->GetTextureObject()->GetDesc( &desc ); + if ( desc.Format >= 1 && desc.Format < TEXTURE_ATLAS_MAX ) + uniqueNormals.push_back( { normalTex, desc.Format, normalTex->GetTextureObject() } ); + } + + // FX map + D3D11Texture* fxTex = surface->GetFxMap(); + if ( fxTex && seenFx.insert( fxTex ).second ) { + D3D11_TEXTURE2D_DESC desc; + fxTex->GetTextureObject()->GetDesc( &desc ); + if ( desc.Format >= 1 && desc.Format < TEXTURE_ATLAS_MAX ) + uniqueFx.push_back( { fxTex, desc.Format, fxTex->GetTextureObject() } ); + } + } + } + } + + auto* device = m_Engine->GetDevice().Get(); + auto* context = m_Engine->GetContext().Get(); + + // Build per-format Texture2DArray atlases for diffuse textures + { + std::sort( uniqueDiffuse.begin(), uniqueDiffuse.end(), + []( const DiffuseTextureInfo& a, const DiffuseTextureInfo& b ) { return a.Format < b.Format; } ); + + size_t rangeStart = 0; + while ( rangeStart < uniqueDiffuse.size() ) { + DXGI_FORMAT fmt = uniqueDiffuse[rangeStart].Format; + size_t rangeEnd = rangeStart; + while ( rangeEnd < uniqueDiffuse.size() && uniqueDiffuse[rangeEnd].Format == fmt ) + rangeEnd++; + + std::vector texPtrs; + texPtrs.reserve( rangeEnd - rangeStart ); + for ( size_t i = rangeStart; i < rangeEnd; i++ ) + texPtrs.push_back( uniqueDiffuse[i].Texture2D.Get() ); + + std::basic_string_view txView( texPtrs.data(), texPtrs.size() ); + TextureManager::AtlasResult atlas = TextureManager::CreateAtlasArray( device, context, txView, 2048, 6 ); + + for ( size_t i = 0; i < texPtrs.size(); i++ ) + m_WorldMeshDiffuseAtlasLookup[uniqueDiffuse[rangeStart + i].gothicTexture] = { fmt, atlas.descriptors[i] }; + + m_WorldMeshDiffuseAtlasses[fmt] = atlas; + rangeStart = rangeEnd; + } + } + + // Helper: build aux (normal/fx) atlases + auto buildAuxAtlases = [&]( std::vector& textures, + std::unordered_map& lookup, + std::array& atlasses ) { + std::sort( textures.begin(), textures.end(), + []( const AuxTextureInfo& a, const AuxTextureInfo& b ) { return a.Format < b.Format; } ); + + size_t rangeStart = 0; + while ( rangeStart < textures.size() ) { + DXGI_FORMAT fmt = textures[rangeStart].Format; + size_t rangeEnd = rangeStart; + while ( rangeEnd < textures.size() && textures[rangeEnd].Format == fmt ) + rangeEnd++; + + std::vector texPtrs; + texPtrs.reserve( rangeEnd - rangeStart ); + for ( size_t i = rangeStart; i < rangeEnd; i++ ) + texPtrs.push_back( textures[i].Texture2D.Get() ); + + std::basic_string_view txView( texPtrs.data(), texPtrs.size() ); + TextureManager::AtlasResult atlas = TextureManager::CreateAtlasArray( device, context, txView, 2048, 6 ); + + for ( size_t i = 0; i < texPtrs.size(); i++ ) + lookup[textures[rangeStart + i].engineTexture] = { fmt, atlas.descriptors[i] }; + + atlasses[fmt] = atlas; + rangeStart = rangeEnd; + } + }; + + buildAuxAtlases( uniqueNormals, m_WorldMeshNormalAtlasLookup, m_WorldMeshNormalAtlasses ); + buildAuxAtlases( uniqueFx, m_WorldMeshFxAtlasLookup, m_WorldMeshFxAtlasses ); + + LogInfo() << "World Mesh Atlas: " << uniqueDiffuse.size() << " diffuse, " + << uniqueNormals.size() << " normal, " << uniqueFx.size() << " fx textures"; +} + +// ============================================================ +// BuildGeometryBuffers +// ============================================================ +void D3D11MeshAtlasPass::BuildGeometryBuffers() { + std::vector allVertices; + std::vector allIndices; + std::vector submeshGPU; + + std::map groupsByFormat; + std::unordered_set processedMeshes; + + // Pre-count + { + size_t totalVertices = 0, totalIndices = 0, totalSubmeshes = 0; + auto& ws = Engine::GAPI->GetWorldSections(); + for ( auto& [x, row] : ws ) { + for ( auto& [y, section] : row ) { + for ( auto const& [meshKey, worldMeshInfo] : section.WorldMeshes ) { + if ( !meshKey.Material ) continue; + zCTexture* tex = meshKey.Material->GetTextureSingle(); + if ( m_WorldMeshDiffuseAtlasLookup.find( tex ) != m_WorldMeshDiffuseAtlasLookup.end() ) { + totalVertices += worldMeshInfo->Vertices.size(); + totalIndices += worldMeshInfo->Indices.size(); + totalSubmeshes++; + } + } + } + } + allVertices.reserve( totalVertices ); + allIndices.reserve( totalIndices ); + submeshGPU.reserve( totalSubmeshes ); + } + + auto& worldSections = Engine::GAPI->GetWorldSections(); + for ( auto& [x, row] : worldSections ) { + for ( auto& [y, section] : row ) { + for ( auto const& [meshKey, worldMeshInfo] : section.WorldMeshes ) { + if ( !meshKey.Material ) continue; + + zCTexture* tex = meshKey.Material->GetTextureSingle(); + auto diffIt = m_WorldMeshDiffuseAtlasLookup.find( tex ); + if ( diffIt == m_WorldMeshDiffuseAtlasLookup.end() ) + continue; + + MeshInfo* mi = worldMeshInfo; + if ( !processedMeshes.insert( mi ).second ) + continue; + + m_WorldMeshAtlasedSubmeshes.insert( mi ); + + const TextureAtlasLookup& diffLookup = diffIt->second; + auto& group = groupsByFormat[diffLookup.atlasFormat]; + group.format = diffLookup.atlasFormat; + + UINT baseVertex = static_cast(allVertices.size()); + UINT startIndex = static_cast(allIndices.size()); + + allVertices.insert( allVertices.end(), mi->Vertices.begin(), mi->Vertices.end() ); + allIndices.insert( allIndices.end(), mi->Indices.begin(), mi->Indices.end() ); + + WorldMeshSubmeshGPUData gpuData = {}; + gpuData.diffuseSlice = diffLookup.descriptor.slice; + gpuData.dUStart = diffLookup.descriptor.uStart; + gpuData.dVStart = diffLookup.descriptor.vStart; + gpuData.dUEnd = diffLookup.descriptor.uEnd; + gpuData.dVEnd = diffLookup.descriptor.vEnd; + + UINT flags = 0; + auto surface = tex->GetSurface(); + if ( surface ) { + D3D11Texture* normalTex = surface->GetNormalmap(); + if ( normalTex ) { + auto normIt = m_WorldMeshNormalAtlasLookup.find( normalTex ); + if ( normIt != m_WorldMeshNormalAtlasLookup.end() ) { + gpuData.normalSlice = normIt->second.descriptor.slice; + gpuData.nUStart = normIt->second.descriptor.uStart; + gpuData.nVStart = normIt->second.descriptor.vStart; + gpuData.nUEnd = normIt->second.descriptor.uEnd; + gpuData.nVEnd = normIt->second.descriptor.vEnd; + flags |= 1; // HAS_NORMAL + } + } + + D3D11Texture* fxTex = surface->GetFxMap(); + if ( fxTex ) { + auto fxIt = m_WorldMeshFxAtlasLookup.find( fxTex ); + if ( fxIt != m_WorldMeshFxAtlasLookup.end() ) { + gpuData.fxSlice = fxIt->second.descriptor.slice; + gpuData.fUStart = fxIt->second.descriptor.uStart; + gpuData.fVStart = fxIt->second.descriptor.vStart; + gpuData.fUEnd = fxIt->second.descriptor.uEnd; + gpuData.fVEnd = fxIt->second.descriptor.vEnd; + flags |= 2; // HAS_FX + } + } + } + + int alphaFunc = meshKey.Material->GetAlphaFunc(); + if ( alphaFunc == zMAT_ALPHA_FUNC_TEST || tex->HasAlphaChannel() ) + flags |= 4; // ALPHA_TEST + + gpuData.flags = flags; + + UINT submeshIndex = static_cast(submeshGPU.size()); + submeshGPU.push_back( gpuData ); + + D3D11_DRAW_INDEXED_INSTANCED_INDIRECT_ARGS args = {}; + args.IndexCountPerInstance = static_cast(mi->Indices.size()); + args.InstanceCount = 1; + args.StartIndexLocation = startIndex; + args.BaseVertexLocation = static_cast(baseVertex); + args.StartInstanceLocation = submeshIndex; + group.indirectArgs.push_back( args ); + } + } + } + + if ( allVertices.empty() ) { + LogWarn() << "D3D11MeshAtlasPass::BuildGeometryBuffers: No world mesh vertices for atlas"; + return; + } + + m_WorldMeshGlobalVertexBuffer = std::make_unique(); + m_WorldMeshGlobalVertexBuffer->Init( + allVertices.data(), + static_cast(allVertices.size() * sizeof( ExVertexStruct )), + D3D11VertexBuffer::B_VERTEXBUFFER, + D3D11VertexBuffer::U_IMMUTABLE, + D3D11VertexBuffer::CA_NONE ); + + m_WorldMeshGlobalIndexBuffer = std::make_unique(); + m_WorldMeshGlobalIndexBuffer->Init( + allIndices.data(), + static_cast(allIndices.size() * sizeof( VERTEX_INDEX )), + D3D11VertexBuffer::B_INDEXBUFFER, + D3D11VertexBuffer::U_IMMUTABLE, + D3D11VertexBuffer::CA_NONE ); + + UINT maxIds = static_cast(submeshGPU.size()); + if ( maxIds < 256 ) maxIds = 256; + std::vector instanceIds( maxIds ); + for ( uint32_t i = 0; i < maxIds; i++ ) + instanceIds[i] = i; + + m_WorldMeshGlobalInstanceIdBuffer = std::make_unique(); + m_WorldMeshGlobalInstanceIdBuffer->Init( + instanceIds.data(), + static_cast(instanceIds.size() * sizeof( uint32_t )), + D3D11VertexBuffer::B_VERTEXBUFFER, + D3D11VertexBuffer::U_IMMUTABLE, + D3D11VertexBuffer::CA_NONE ); + + auto* device = m_Engine->GetDevice().Get(); + auto* context = m_Engine->GetContext().Get(); + + m_WorldMeshSubmeshBuffer = std::make_unique>(); + m_WorldMeshSubmeshBuffer->Init( device, static_cast(submeshGPU.size()), false, false ); + m_WorldMeshSubmeshBuffer->UpdateBufferDefault( context, submeshGPU.data(), static_cast(submeshGPU.size()) ); + + m_WorldMeshAtlasDrawGroups.clear(); + for ( auto& [fmt, group] : groupsByFormat ) { + if ( group.indirectArgs.empty() ) + continue; + + UINT bufSize = static_cast(group.indirectArgs.size() * sizeof( D3D11_DRAW_INDEXED_INSTANCED_INDIRECT_ARGS )); + group.indirectBuffer = std::make_unique(); + group.indirectBuffer->Init( + group.indirectArgs.data(), bufSize, + D3D11IndirectBuffer::B_VERTEXBUFFER, + D3D11IndirectBuffer::U_IMMUTABLE, + D3D11IndirectBuffer::CA_NONE ); + + m_WorldMeshAtlasDrawGroups.push_back( std::move( group ) ); + } + + LogInfo() << "World Mesh Atlas geometry: " << allVertices.size() << " vertices, " + << allIndices.size() << " indices, " + << m_WorldMeshAtlasDrawGroups.size() << " format groups, " + << submeshGPU.size() << " submeshes"; +} + +// ============================================================ +// Draw – per-frame indirect draw of atlased world mesh +// ============================================================ +XRESULT D3D11MeshAtlasPass::Draw() { + if ( m_WorldMeshAtlasDrawGroups.empty() || + !m_WorldMeshGlobalVertexBuffer || !m_WorldMeshGlobalIndexBuffer ) + return XR_SUCCESS; + + auto _ = m_Engine->RecordGraphicsEvent( L"DrawWorldMesh_Atlas" ); + auto& context = m_Engine->GetContext(); + + m_Engine->SetDefaultStates(); + + XMMATRIX view = Engine::GAPI->GetViewMatrixXM(); + Engine::GAPI->SetViewTransformXM( view ); + Engine::GAPI->ResetWorldTransform(); + + context->DSSetShader( nullptr, nullptr, 0 ); + context->HSSetShader( nullptr, nullptr, 0 ); + + // --- Bind global geometry --- + UINT strides[2] = { sizeof( ExVertexStruct ), sizeof( uint32_t ) }; + UINT offsets[2] = { 0, 0 }; + ID3D11Buffer* vbs[2] = { + m_WorldMeshGlobalVertexBuffer->GetVertexBuffer().Get(), + m_WorldMeshGlobalInstanceIdBuffer->GetVertexBuffer().Get() + }; + context->IASetVertexBuffers( 0, 2, vbs, strides, offsets ); + context->IASetIndexBuffer( m_WorldMeshGlobalIndexBuffer->GetVertexBuffer().Get(), + VERTEX_INDEX_DXGI_FORMAT, 0 ); + context->IASetPrimitiveTopology( D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST ); + + // Submesh structured buffer -> VS t1 + ID3D11ShaderResourceView* submeshSRV = m_WorldMeshSubmeshBuffer->GetSRV(); + context->VSSetShaderResources( 1, 1, &submeshSRV ); + + // Vertex shader + m_Engine->SetActiveVertexShader( VShaderID::VS_ExWorldAtlas ); + m_Engine->SetupVS_ExMeshDrawCall(); + m_Engine->SetupVS_ExConstantBuffer(); + m_Engine->ActiveVS->Apply(); + + // Pixel shader + m_Engine->SetActivePixelShader( PShaderID::PS_WorldAtlas ); + + m_Engine->ActivePS->GetConstantBuffer()[0]->UpdateBuffer( + &Engine::GAPI->GetRendererState().GraphicsState ); + m_Engine->ActivePS->GetConstantBuffer()[0]->BindToPixelShader( 0 ); + + GSky* sky = Engine::GAPI->GetSky(); + m_Engine->ActivePS->GetConstantBuffer()[1]->UpdateBuffer( &sky->GetAtmosphereCB() ); + m_Engine->ActivePS->GetConstantBuffer()[1]->BindToPixelShader( 1 ); + + MaterialInfo defMaterial{}; + m_Engine->ActivePS->GetConstantBuffer()[2]->UpdateBuffer( &defMaterial.buffer ); + m_Engine->ActivePS->GetConstantBuffer()[2]->BindToPixelShader( 2 ); + + m_Engine->InfiniteRangeConstantBuffer->BindToPixelShader( 3 ); + + context->PSSetShaderResources( 4, 1, m_Engine->ReflectionCube.GetAddressOf() ); + + m_Engine->ActivePS->Apply(); + + // --- Draw per format group --- + for ( auto& group : m_WorldMeshAtlasDrawGroups ) { + ID3D11ShaderResourceView* diffuseSRV = m_WorldMeshDiffuseAtlasses[group.format].atlasSRV; + if ( !diffuseSRV ) continue; + + // Bind first available normal/fx atlases (format grouping is per-diffuse) + ID3D11ShaderResourceView* normalSRV = nullptr; + ID3D11ShaderResourceView* fxSRV = nullptr; + for ( size_t i = 0; i < TEXTURE_ATLAS_MAX; i++ ) { + if ( !normalSRV && m_WorldMeshNormalAtlasses[i].atlasSRV ) + normalSRV = m_WorldMeshNormalAtlasses[i].atlasSRV; + if ( !fxSRV && m_WorldMeshFxAtlasses[i].atlasSRV ) + fxSRV = m_WorldMeshFxAtlasses[i].atlasSRV; + } + + ID3D11ShaderResourceView* psSRVs[3] = { diffuseSRV, normalSRV, fxSRV }; + context->PSSetShaderResources( 0, 3, psSRVs ); + + DrawMultiIndexedInstancedIndirect( + context.Get(), + static_cast(group.indirectArgs.size()), + group.indirectBuffer->GetIndirectBuffer().Get(), + 0, + sizeof( D3D11_DRAW_INDEXED_INSTANCED_INDIRECT_ARGS ) ); + } + + // Unbind submesh buffer from VS + ID3D11ShaderResourceView* nullSRV = nullptr; + context->VSSetShaderResources( 1, 1, &nullSRV ); + + return XR_SUCCESS; +} diff --git a/D3D11Engine/D3D11MeshAtlasPass.h b/D3D11Engine/D3D11MeshAtlasPass.h new file mode 100644 index 00000000..e8c4a2f0 --- /dev/null +++ b/D3D11Engine/D3D11MeshAtlasPass.h @@ -0,0 +1,83 @@ +#pragma once +#include "D3D11AtlasTypes.h" +#include "D3D11StructuredBuffer.h" +#include "D3D11VertexBuffer.h" +#include "D3D11ConstantBuffer.h" +#include "ConstantBufferStructs.h" + +#include +#include +#include +#include +#include +#include + +class D3D11GraphicsEngine; +class D3D11Texture; +class zCTexture; +struct MeshInfo; + +/** + * Encapsulates all texture-atlas-based GPU-driven rendering for world mesh geometry. + * + * Responsibilities: + * - Building per-format Texture2DArray atlases for world-mesh diffuse, normal, and FX textures + * - Building the merged global world-mesh VB/IB with per-submesh indirect-args buffers + * - Executing the multi-indirect draw of atlased world mesh geometry each frame + * + * The engine keeps one instance of this class. Call Build() when a new world + * is loaded (OnWorldLoaded), and Draw() every frame instead of the old + * DrawWorldMesh_Atlas(). + */ +class D3D11MeshAtlasPass { + friend class D3D11GraphicsEngine; +public: + explicit D3D11MeshAtlasPass( D3D11GraphicsEngine* engine ); + + /** (Re-)build atlases and geometry buffers. + * Called from D3D11GraphicsEngine::OnWorldLoaded(). */ + void Build(); + + /** Draw atlased world mesh geometry via multi-indirect. */ + XRESULT Draw(); + + /** True once Build() has completed and at least one draw group exists. */ + bool IsReady() const { return !m_WorldMeshAtlasDrawGroups.empty(); } + + /** Returns true if the given MeshInfo was atlased (used to skip it in the legacy path). */ + bool IsSubmeshAtlased( MeshInfo* mi ) const { + return m_WorldMeshAtlasedSubmeshes.count( mi ) != 0; + } + + /** Diffuse atlas lookup (read-only access for shadow passes). */ + const std::unordered_map& GetDiffuseAtlasLookup() const { + return m_WorldMeshDiffuseAtlasLookup; + } + +private: + D3D11GraphicsEngine* m_Engine; + + // ---- Atlas textures (one array per texture type) ---- + std::unordered_map m_WorldMeshDiffuseAtlasLookup; + std::unordered_map m_WorldMeshNormalAtlasLookup; + std::unordered_map m_WorldMeshFxAtlasLookup; + + std::array m_WorldMeshDiffuseAtlasses{}; + std::array m_WorldMeshNormalAtlasses{}; + std::array m_WorldMeshFxAtlasses{}; + + // ---- Global geometry ---- + std::unique_ptr m_WorldMeshGlobalVertexBuffer; + std::unique_ptr m_WorldMeshGlobalIndexBuffer; + std::unique_ptr m_WorldMeshGlobalInstanceIdBuffer; + + // ---- GPU submesh descriptors ---- + std::unique_ptr> m_WorldMeshSubmeshBuffer; + + // ---- Draw groups ---- + std::vector m_WorldMeshAtlasDrawGroups; + std::unordered_set m_WorldMeshAtlasedSubmeshes; + + void BuildTextureAtlasses(); + void BuildGeometryBuffers(); +}; diff --git a/D3D11Engine/D3D11VobAtlasPass.cpp b/D3D11Engine/D3D11VobAtlasPass.cpp new file mode 100644 index 00000000..aedaeaba --- /dev/null +++ b/D3D11Engine/D3D11VobAtlasPass.cpp @@ -0,0 +1,608 @@ +#include "D3D11VobAtlasPass.h" +#include "D3D11GraphicsEngine.h" + +#include "D3D11ShaderManager.h" +#include "D3D11VShader.h" +#include "D3D11PShader.h" +#include "D3D11CShader.h" +#include "D3D11ConstantBuffer.h" +#include "GothicAPI.h" +#include "GSky.h" +#include "RenderToTextureBuffer.h" +#include "WorldObjects.h" +#include "VertexTypes.h" +#include "zCTexture.h" +#include "zCMaterial.h" +#include "zCVob.h" + +#include +#include + +// ----- globals defined in D3D11GraphicsEngine.cpp ----- +extern bool SupportTextureAtlases; +extern float vobAnimation_WindStrength; +namespace { + constexpr DXGI_FORMAT VERTEX_INDEX_DXGI_FORMAT = sizeof( VERTEX_INDEX ) == sizeof( unsigned short ) ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R32_UINT; +} + +typedef void( __cdecl* PFN_DRAWMULTIINDEXEDINSTANCEDINDIRECT )( + ID3D11DeviceContext* context, unsigned int drawCount, + ID3D11Buffer* buffer, unsigned int alignedByteOffsetForArgs, + unsigned int alignedByteStrideForArgs ); +extern PFN_DRAWMULTIINDEXEDINSTANCEDINDIRECT DrawMultiIndexedInstancedIndirect; + +// ------------------------------------------------------- + +D3D11VobAtlasPass::D3D11VobAtlasPass( D3D11GraphicsEngine* engine ) + : m_Engine( engine ) { +} + +// ============================================================ +// Build – entry point called from OnWorldLoaded +// ============================================================ +void D3D11VobAtlasPass::Build() { + // Reset everything + for ( size_t i = 0; i < TEXTURE_ATLAS_MAX; i++ ) + m_TextureAtlasses[(DXGI_FORMAT)i].Destroy(); + m_TextureAtlasLookup.clear(); + m_AtlasDrawGroups.clear(); + + if ( !SupportTextureAtlases || + !Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.FeatureSet.EnableAtlasStaticVobs ) { + return; + } + + BuildTextureAtlasses(); + + if ( m_TextureAtlasLookup.empty() ) + return; + + BuildGeometryBuffers(); + BuildGPUCullingBuffers(); +} + +// ============================================================ +// BuildTextureAtlasses +// ============================================================ +void D3D11VobAtlasPass::BuildTextureAtlasses() { + struct TextureInfo { + zCTexture* gothicTexture; + DXGI_FORMAT Format; + Microsoft::WRL::ComPtr Texture2D; + }; + + std::unordered_set seenTextures; + std::vector uniqueTextures; + + for ( auto vobInfo : m_Engine->m_StaticVobs ) { + for ( auto& byTex : reinterpret_cast(vobInfo->VisualInfo)->MeshesByTexture ) { + zCTexture* tex = byTex.first.Texture; + if ( !tex ) + tex = byTex.first.Material->GetTexture(); + if ( !tex ) + tex = byTex.first.Material->GetAniTexture(); + + if ( !tex || !seenTextures.insert( tex ).second ) { + LogError() << "Texture not found for visual " << vobInfo->VisualInfo->VisualName; + continue; + } + + auto cachedState = tex->CacheIn( -1 ); + if ( cachedState != zRES_CACHED_IN ) { + LogError() << "Texture " << tex->GetName() << " was not cached in"; + continue; + } + + auto surface = tex->GetSurface(); + if ( !surface || !surface->IsSurfaceReady() ) { + LogError() << "Texture " << tex->GetName() << " surface not ready"; + continue; + } + + auto engineTex = surface->GetEngineTexture(); + if ( !engineTex ) { + LogError() << "Texture " << tex->GetName() << " no engine texture"; + continue; + } + + D3D11_TEXTURE2D_DESC desc; + engineTex->GetTextureObject()->GetDesc( &desc ); + if ( desc.Format < 1 || desc.Format >= TEXTURE_ATLAS_MAX ) { + LogError() << "Texture " << tex->GetName() << " has unsupported format for atlas: " << desc.Format; + continue; + } + uniqueTextures.push_back( { tex, desc.Format, engineTex->GetTextureObject() } ); + } + } + + // Sort by format so same-format textures are contiguous + std::sort( uniqueTextures.begin(), uniqueTextures.end(), + []( const TextureInfo& a, const TextureInfo& b ) { return a.Format < b.Format; } ); + + // Create one Texture2DArray atlas per contiguous format range + size_t rangeStart = 0; + while ( rangeStart < uniqueTextures.size() ) { + DXGI_FORMAT fmt = uniqueTextures[rangeStart].Format; + size_t rangeEnd = rangeStart; + while ( rangeEnd < uniqueTextures.size() && uniqueTextures[rangeEnd].Format == fmt ) + rangeEnd++; + + std::vector texPtrs; + texPtrs.reserve( rangeEnd - rangeStart ); + for ( size_t i = rangeStart; i < rangeEnd; i++ ) + texPtrs.push_back( uniqueTextures[i].Texture2D.Get() ); + + std::basic_string_view txView( texPtrs.data(), texPtrs.size() ); + TextureManager::AtlasResult atlas = TextureManager::CreateAtlasArray( + m_Engine->GetDevice().Get(), m_Engine->GetContext().Get(), txView, 2048, 6 ); + + for ( size_t i = 0; i < texPtrs.size(); i++ ) { + m_TextureAtlasLookup[uniqueTextures[rangeStart + i].gothicTexture] = { + fmt, atlas.descriptors[i] + }; + } + m_TextureAtlasses[fmt] = atlas; + rangeStart = rangeEnd; + } + + LogInfo() << "VOB Atlas: " << uniqueTextures.size() << " unique textures, " + << m_TextureAtlasLookup.size() << " mapped"; +} + +// ============================================================ +// BuildGeometryBuffers +// ============================================================ +void D3D11VobAtlasPass::BuildGeometryBuffers() { + std::vector allVertices; + std::vector allIndices; + std::map groupsByFormat; + std::unordered_set processedMeshes; + + // Pre-count to avoid incremental reallocation + { + size_t totalVertices = 0, totalIndices = 0; + std::unordered_set counted; + for ( auto const& [proto, visual] : Engine::GAPI->GetStaticMeshVisuals() ) { + for ( auto const& [meshKey, meshList] : visual->MeshesByTexture ) { + if ( m_TextureAtlasLookup.find( meshKey.Texture ) == m_TextureAtlasLookup.end() ) + continue; + for ( MeshInfo* mi : meshList ) { + if ( counted.insert( mi ).second ) { + totalVertices += mi->Vertices.size(); + totalIndices += mi->Indices.size(); + } + } + } + } + allVertices.reserve( totalVertices ); + allIndices.reserve( totalIndices ); + } + + for ( auto const& [proto, visual] : Engine::GAPI->GetStaticMeshVisuals() ) { + for ( auto const& [meshKey, meshList] : visual->MeshesByTexture ) { + auto it = m_TextureAtlasLookup.find( meshKey.Texture ); + if ( it == m_TextureAtlasLookup.end() ) + continue; + + const TextureAtlasLookup& lookup = it->second; + auto& group = groupsByFormat[lookup.atlasFormat]; + group.format = lookup.atlasFormat; + + for ( MeshInfo* mi : meshList ) { + if ( !processedMeshes.insert( mi ).second ) + continue; + + UINT baseVertex = static_cast(allVertices.size()); + UINT startIndex = static_cast(allIndices.size()); + + allVertices.insert( allVertices.end(), mi->Vertices.begin(), mi->Vertices.end() ); + allIndices.insert( allIndices.end(), mi->Indices.begin(), mi->Indices.end() ); + + StaticSubmeshEntry entry; + entry.indexCount = static_cast(mi->Indices.size()); + entry.startIndexLocation = startIndex; + entry.baseVertexLocation = static_cast(baseVertex); + entry.atlasDesc = lookup.descriptor; + entry.visual = visual; + group.submeshes.push_back( entry ); + + D3D11_DRAW_INDEXED_INSTANCED_INDIRECT_ARGS args = {}; + args.IndexCountPerInstance = entry.indexCount; + args.InstanceCount = 0; + args.StartIndexLocation = entry.startIndexLocation; + args.BaseVertexLocation = entry.baseVertexLocation; + args.StartInstanceLocation = 0; + group.indirectArgs.push_back( args ); + } + } + } + + if ( allVertices.empty() ) { + LogWarn() << "D3D11VobAtlasPass::BuildGeometryBuffers: No vertices to process"; + return; + } + + m_StaticGlobalVertexBuffer = std::make_unique(); + m_StaticGlobalVertexBuffer->Init( + allVertices.data(), + static_cast(allVertices.size() * sizeof( ExVertexStruct )), + D3D11VertexBuffer::B_VERTEXBUFFER, + D3D11VertexBuffer::U_IMMUTABLE, + D3D11VertexBuffer::CA_NONE ); + + m_StaticGlobalIndexBuffer = std::make_unique(); + m_StaticGlobalIndexBuffer->Init( + allIndices.data(), + static_cast(allIndices.size() * sizeof( VERTEX_INDEX )), + D3D11VertexBuffer::B_INDEXBUFFER, + D3D11VertexBuffer::U_IMMUTABLE, + D3D11VertexBuffer::CA_NONE ); + + UINT maxInstanceIds = static_cast(m_Engine->m_StaticVobs.size() * 4); + if ( maxInstanceIds < 4096 ) + maxInstanceIds = 4096; + std::vector instanceIds( maxInstanceIds ); + for ( uint32_t i = 0; i < maxInstanceIds; i++ ) + instanceIds[i] = i; + + m_GlobalInstanceIdBuffer = std::make_unique(); + m_GlobalInstanceIdBuffer->Init( + instanceIds.data(), + static_cast(instanceIds.size() * sizeof( uint32_t )), + D3D11VertexBuffer::B_VERTEXBUFFER, + D3D11VertexBuffer::U_IMMUTABLE, + D3D11VertexBuffer::CA_NONE ); + + m_AtlasDrawGroups.clear(); + for ( auto& [fmt, group] : groupsByFormat ) { + if ( group.indirectArgs.empty() ) + continue; + + UINT bufSize = static_cast(group.indirectArgs.size() * sizeof( D3D11_DRAW_INDEXED_INSTANCED_INDIRECT_ARGS )); + group.indirectBuffer = std::make_unique(); + group.indirectBuffer->Init( + group.indirectArgs.data(), bufSize, + D3D11IndirectBuffer::B_VERTEXBUFFER, + D3D11IndirectBuffer::U_DYNAMIC, + D3D11IndirectBuffer::CA_WRITE ); + + m_AtlasDrawGroups.push_back( std::move( group ) ); + } + + LogInfo() << "VOB Atlas geometry: " << allVertices.size() << " vertices, " + << allIndices.size() << " indices, " + << m_AtlasDrawGroups.size() << " atlas groups, " + << processedMeshes.size() << " unique submeshes"; +} + +// ============================================================ +// BuildGPUCullingBuffers +// ============================================================ +void D3D11VobAtlasPass::BuildGPUCullingBuffers() { + if ( m_AtlasDrawGroups.empty() || m_Engine->m_StaticVobs.empty() ) + return; + + // --- 1. Build visual -> vob-count mapping --- + std::unordered_map vobsPerVisual; + std::unordered_map> vobIndicesByVisual; + + for ( size_t i = 0; i < m_Engine->m_StaticVobs.size(); i++ ) { + auto* visual = reinterpret_cast(m_Engine->m_StaticVobs[i]->VisualInfo); + vobsPerVisual[visual]++; + vobIndicesByVisual[visual].push_back( i ); + } + + // --- 2. Build merged indirect args + SubmeshGPUData --- + std::vector mergedArgs; + std::unordered_map> visualSubmeshMap; + + UINT runningInstanceOffset = 0; + UINT globalArgIndex = 0; + + for ( auto& group : m_AtlasDrawGroups ) { + group.mergedArgsOffset = static_cast(mergedArgs.size() * sizeof( D3D11_DRAW_INDEXED_INSTANCED_INDIRECT_ARGS )); + group.mergedArgsCount = static_cast(group.indirectArgs.size()); + + for ( size_t si = 0; si < group.submeshes.size(); si++ ) { + const auto& submesh = group.submeshes[si]; + MeshVisualInfo* visual = submesh.visual; + UINT maxInstances = vobsPerVisual.count( visual ) ? vobsPerVisual[visual] : 0; + + D3D11_DRAW_INDEXED_INSTANCED_INDIRECT_ARGS args = {}; + args.IndexCountPerInstance = submesh.indexCount; + args.InstanceCount = 0; + args.StartIndexLocation = submesh.startIndexLocation; + args.BaseVertexLocation = submesh.baseVertexLocation; + args.StartInstanceLocation = runningInstanceOffset; + mergedArgs.push_back( args ); + + SubmeshGPUData smGPU = {}; + smGPU.slice = submesh.atlasDesc.slice; + smGPU.uStart = submesh.atlasDesc.uStart; + smGPU.vStart = submesh.atlasDesc.vStart; + smGPU.uEnd = submesh.atlasDesc.uEnd; + smGPU.vEnd = submesh.atlasDesc.vEnd; + smGPU.argIndex = globalArgIndex; + smGPU.instanceBaseOffset = runningInstanceOffset; + smGPU.globalSourceIndex = 0; + + visualSubmeshMap[visual].push_back( smGPU ); + + runningInstanceOffset += maxInstances; + globalArgIndex++; + } + } + + m_TotalMaxInstances = runningInstanceOffset; + + // --- 3. Flatten per-visual submesh entries --- + struct VisualSubmeshRange { UINT start; UINT count; }; + std::unordered_map visualSubmeshRanges; + std::vector submeshGPU; + + for ( auto& [visual, entries] : visualSubmeshMap ) { + UINT start = static_cast(submeshGPU.size()); + for ( auto& entry : entries ) + submeshGPU.push_back( entry ); + visualSubmeshRanges[visual] = { start, static_cast(entries.size()) }; + } + + // --- 4. Build VobGPUData --- + std::vector vobGPU; + vobGPU.reserve( m_Engine->m_StaticVobs.size() ); + + for ( size_t i = 0; i < m_Engine->m_StaticVobs.size(); i++ ) { + VobInfo* v = m_Engine->m_StaticVobs[i]; + auto* visual = reinterpret_cast(v->VisualInfo); + + VobGPUData data = {}; + DirectX::BoundingBox bb = Frustum::BBoxFromzTBBox3D( v->Vob->GetBBox() ); + data.aabbCenter = bb.Center; + data.aabbExtent = bb.Extents; + data.world = v->WorldMatrix; + data.prevWorld = v->WorldMatrix; + data.color = v->GroundColor; + + zTAnimationMode aniMode = v->Vob->GetVisualAniMode(); + if ( aniMode != zVISUAL_ANIMODE_NONE ) { + data.aniModeStrength = v->Vob->GetVisualAniModeStrength(); + data.canBeAffectedByPlayer = (!v->Vob->GetDynColl() ? 1.0f : 0.0f); + } else { + data.aniModeStrength = 0.0f; + data.canBeAffectedByPlayer = 0.0f; + } + + auto it = visualSubmeshRanges.find( visual ); + if ( it != visualSubmeshRanges.end() ) { + data.submeshStart = it->second.start; + data.submeshCount = it->second.count; + } + vobGPU.push_back( data ); + } + + // --- 5. Upload to GPU --- + auto* device = m_Engine->GetDevice().Get(); + auto* context = m_Engine->GetContext().Get(); + + m_VobGPUBuffer = std::make_unique>(); + m_VobGPUBuffer->Init( device, static_cast(vobGPU.size()), false, false ); + m_VobGPUBuffer->UpdateBufferDefault( context, vobGPU.data(), static_cast(vobGPU.size()) ); + + m_SubmeshGPUBuffer = std::make_unique>(); + m_SubmeshGPUBuffer->Init( device, static_cast(submeshGPU.size()), false, false ); + m_SubmeshGPUBuffer->UpdateBufferDefault( context, submeshGPU.data(), static_cast(submeshGPU.size()) ); + + UINT instanceCapacity = std::max( m_TotalMaxInstances, 1u ); + m_InstanceBufferGPU = std::make_unique>(); + m_InstanceBufferGPU->Init( device, instanceCapacity, false, true ); + + UINT argsSize = static_cast(mergedArgs.size() * sizeof( D3D11_DRAW_INDEXED_INSTANCED_INDIRECT_ARGS )); + m_MergedIndirectArgs = std::make_unique(); + m_MergedIndirectArgs->Init( + mergedArgs.data(), argsSize, + D3D11IndirectBuffer::B_UNORDERED_ACCESS, + D3D11IndirectBuffer::U_DEFAULT, + D3D11IndirectBuffer::CA_NONE ); + + m_MergedArgsReset = mergedArgs; + + D3D11_BUFFER_DESC templateDesc = {}; + templateDesc.ByteWidth = argsSize; + templateDesc.Usage = D3D11_USAGE_DEFAULT; + templateDesc.BindFlags = 0; + templateDesc.MiscFlags = D3D11_RESOURCE_MISC_DRAWINDIRECT_ARGS; + + D3D11_SUBRESOURCE_DATA templateData = {}; + templateData.pSysMem = mergedArgs.data(); + device->CreateBuffer( &templateDesc, &templateData, m_IndirectArgsTemplate.ReleaseAndGetAddressOf() ); + + CullConstants initCB = {}; + m_CullConstantBuffer = std::make_unique( sizeof( CullConstants ), &initCB ); + + if ( m_TotalMaxInstances > 0 ) { + std::vector instanceIds( m_TotalMaxInstances ); + for ( uint32_t i = 0; i < m_TotalMaxInstances; i++ ) + instanceIds[i] = i; + + m_GlobalInstanceIdBuffer = std::make_unique(); + m_GlobalInstanceIdBuffer->Init( + instanceIds.data(), + static_cast(instanceIds.size() * sizeof( uint32_t )), + D3D11VertexBuffer::B_VERTEXBUFFER, + D3D11VertexBuffer::U_IMMUTABLE, + D3D11VertexBuffer::CA_NONE ); + } + + LogInfo() << "VOB Atlas GPU Culling: " << vobGPU.size() << " vobs, " + << submeshGPU.size() << " submesh entries, " + << mergedArgs.size() << " indirect args, " + << m_TotalMaxInstances << " max instances"; +} + +// ============================================================ +// Draw – per-frame GPU-cull + indirect draw +// ============================================================ +XRESULT D3D11VobAtlasPass::Draw( const Frustum& frustum, bool bindPS ) { + if ( m_AtlasDrawGroups.empty() || !m_VobGPUBuffer || + !m_StaticGlobalVertexBuffer || !m_StaticGlobalIndexBuffer ) + return XR_SUCCESS; + + auto _ = m_Engine->RecordGraphicsEvent( L"DrawVOBsIndirect" ); + auto& context = m_Engine->GetContext(); + + // --- 0. Build Hi-Z pyramid for occlusion culling (main pass only) --- + const bool useHiZ = bindPS && m_Engine->m_HiZTexture && m_Engine->m_HiZSRV; + if ( useHiZ ) { + m_Engine->CopyDepthStencil(); + m_Engine->BuildHiZPyramid(); + } + + // --- 1. Reset indirect args InstanceCounts --- + context->CopyResource( m_MergedIndirectArgs->GetIndirectBuffer().Get(), + m_IndirectArgsTemplate.Get() ); + + // --- 2. Update cull constant buffer --- + CullConstants cb = {}; + memcpy( cb.frustumPlanes, frustum.GetPlanes().data(), 6 * sizeof( XMFLOAT4 ) ); + cb.cameraPosition = Engine::GAPI->GetCameraPosition(); + cb.drawDistance = Engine::GAPI->GetRendererState().RendererSettings.OutdoorVobDrawRadius; + cb.globalWindStrength = vobAnimation_WindStrength; + cb.windAdvanced = (Engine::GAPI->GetRendererState().RendererSettings.WindQuality + == GothicRendererSettings::EWindQuality::WIND_QUALITY_ADVANCED) ? 1 : 0; + cb.numVobs = static_cast(m_Engine->m_StaticVobs.size()); + cb.feedbackFrameNumber = 0; + + if ( useHiZ ) { + cb.enableHiZ = 1; + cb.hiZMipCount = m_Engine->m_HiZMipCount; + cb.hiZWidth = static_cast(m_Engine->DepthStencilBuffer->GetSizeX()); + cb.hiZHeight = static_cast(m_Engine->DepthStencilBuffer->GetSizeY()); + + XMMATRIX view = Engine::GAPI->GetViewMatrixXM(); + auto& projF = Engine::GAPI->GetProjectionMatrix(); + XMStoreFloat4x4( &cb.viewProjection, XMMatrixMultiply( view, XMLoadFloat4x4( &projF ) ) ); + } else { + cb.enableHiZ = 0; + cb.hiZMipCount = 0; + cb.hiZWidth = 0.0f; + cb.hiZHeight = 0.0f; + XMStoreFloat4x4( &cb.viewProjection, XMMatrixIdentity() ); + } + + m_CullConstantBuffer->UpdateBuffer( &cb ); + m_CullConstantBuffer->BindToComputeShader( 0 ); + + // --- 3. Dispatch CS_CullVobs --- + auto cullCS = m_Engine->ShaderManager->GetCShader( CShaderID::CS_CullVobs ); + if ( !cullCS ) + return XR_SUCCESS; + cullCS->Apply(); + + ID3D11ShaderResourceView* srvs[2] = { + m_VobGPUBuffer->GetSRV(), + m_SubmeshGPUBuffer->GetSRV() + }; + context->CSSetShaderResources( 0, 2, srvs ); + + if ( useHiZ ) { + ID3D11ShaderResourceView* hiZSRV = m_Engine->m_HiZSRV.Get(); + context->CSSetShaderResources( 2, 1, &hiZSRV ); + } + + ID3D11UnorderedAccessView* uavs[2] = { + m_InstanceBufferGPU->GetUAV(), + m_MergedIndirectArgs->GetUnorderedAccessView().Get() + }; + context->CSSetUnorderedAccessViews( 0, 2, uavs, nullptr ); + + UINT numGroups = (static_cast(m_Engine->m_StaticVobs.size()) + 63) / 64; + context->Dispatch( numGroups, 1, 1 ); + + // Unbind CS resources + ID3D11ShaderResourceView* nullSRV[3] = { nullptr, nullptr, nullptr }; + ID3D11UnorderedAccessView* nullUAV[2] = { nullptr, nullptr }; + context->CSSetShaderResources( 0, 3, nullSRV ); + context->CSSetUnorderedAccessViews( 0, 2, nullUAV, nullptr ); + context->CSSetShader( nullptr, nullptr, 0 ); + + // --- 4. Bind global geometry --- + UINT strides[2] = { sizeof( ExVertexStruct ), sizeof( uint32_t ) }; + UINT offsets[2] = { 0, 0 }; + ID3D11Buffer* vbs[2] = { + m_StaticGlobalVertexBuffer->GetVertexBuffer().Get(), + m_GlobalInstanceIdBuffer->GetVertexBuffer().Get() + }; + context->IASetVertexBuffers( 0, 2, vbs, strides, offsets ); + context->IASetIndexBuffer( m_StaticGlobalIndexBuffer->GetVertexBuffer().Get(), + VERTEX_INDEX_DXGI_FORMAT, 0 ); + context->IASetPrimitiveTopology( D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST ); + + // --- 5. Bind instance structured buffer to VS t1 --- + ID3D11ShaderResourceView* instSRV = m_InstanceBufferGPU->GetSRV(); + context->VSSetShaderResources( 1, 1, &instSRV ); + + // --- 6. Set vertex shader --- + m_Engine->SetActiveVertexShader( VShaderID::VS_ExInstancedObjIndirectAtlas ); + m_Engine->SetupVS_ExMeshDrawCall(); + m_Engine->SetupVS_ExConstantBuffer(); + + VS_ExConstantBuffer_Wind windBuff{}; + m_Engine->ApplyWindProps( windBuff ); + m_Engine->ActiveVS->GetConstantBuffer()[1]->UpdateBuffer( &windBuff ); + m_Engine->ActiveVS->GetConstantBuffer()[1]->BindToVertexShader( 1 ); + + if ( bindPS ) + context->PSSetShaderResources( 4, 1, m_Engine->ReflectionCube.GetAddressOf() ); + + m_Engine->ActiveVS->Apply(); + + // --- 7. Draw per atlas group --- + MaterialInfo defMaterial{}; + GSky* sky = Engine::GAPI->GetSky(); + + for ( auto& group : m_AtlasDrawGroups ) { + ID3D11ShaderResourceView* srv = m_TextureAtlasses[group.format].atlasSRV; + if ( !srv ) + continue; + + const bool needsPS = bindPS || (group.format == DXGI_FORMAT_BC2_UNORM); + + if ( needsPS ) { + context->PSSetShaderResources( 0, 1, &srv ); + + if ( bindPS && group.format != DXGI_FORMAT_BC2_UNORM ) + m_Engine->SetActivePixelShader( PShaderID::PS_DiffuseAtlas ); + else + m_Engine->SetActivePixelShader( PShaderID::PS_DiffuseAtlasAlphaTest ); + + m_Engine->ActivePS->GetConstantBuffer()[0]->UpdateBuffer( + &Engine::GAPI->GetRendererState().GraphicsState ); + m_Engine->ActivePS->GetConstantBuffer()[0]->BindToPixelShader( 0 ); + + m_Engine->ActivePS->GetConstantBuffer()[1]->UpdateBuffer( &sky->GetAtmosphereCB() ); + m_Engine->ActivePS->GetConstantBuffer()[1]->BindToPixelShader( 1 ); + + m_Engine->ActivePS->GetConstantBuffer()[2]->UpdateBuffer( &defMaterial.buffer ); + m_Engine->ActivePS->GetConstantBuffer()[2]->BindToPixelShader( 2 ); + + m_Engine->OutdoorVobsConstantBuffer->BindToPixelShader( 3 ); + + m_Engine->ActivePS->Apply(); + } else { + context->PSSetShader( nullptr, nullptr, 0 ); + } + + DrawMultiIndexedInstancedIndirect( + context.Get(), + group.mergedArgsCount, + m_MergedIndirectArgs->GetIndirectBuffer().Get(), + group.mergedArgsOffset, + sizeof( D3D11_DRAW_INDEXED_INSTANCED_INDIRECT_ARGS ) ); + } + + // Unbind instance buffer from VS + ID3D11ShaderResourceView* nullVSSRV = nullptr; + context->VSSetShaderResources( 1, 1, &nullVSSRV ); + + return XR_SUCCESS; +} diff --git a/D3D11Engine/D3D11VobAtlasPass.h b/D3D11Engine/D3D11VobAtlasPass.h new file mode 100644 index 00000000..5c965261 --- /dev/null +++ b/D3D11Engine/D3D11VobAtlasPass.h @@ -0,0 +1,81 @@ +#pragma once +#include "D3D11AtlasTypes.h" +#include "D3D11StructuredBuffer.h" +#include "D3D11VertexBuffer.h" +#include "D3D11ConstantBuffer.h" +#include "VobCulling.h" + +#include +#include +#include +#include +#include + +class D3D11GraphicsEngine; +class Frustum; +class zCTexture; + +/** + * Encapsulates all texture-atlas-based GPU-driven rendering for static VOBs. + * + * Responsibilities: + * - Building per-format Texture2DArray atlases from static-VOB diffuse textures + * - Building the merged global VB/IB and per-submesh indirect-args buffer + * - Building the GPU structured buffers used by CS_CullVobs + * - Executing the GPU-culling compute pass and the subsequent indirect draw + * + * The engine keeps one instance of this class. Call Build() when a new world + * is loaded (OnWorldLoaded), and Draw() every frame in place of the old + * DrawVOBsIndirect(). + */ +class D3D11VobAtlasPass { + friend class D3D11GraphicsEngine; +public: + explicit D3D11VobAtlasPass( D3D11GraphicsEngine* engine ); + + /** (Re-)build atlases, geometry buffers, and GPU culling buffers. + * Called from D3D11GraphicsEngine::OnWorldLoaded(). */ + void Build(); + + /** GPU-cull static VOBs and draw them with indirect multi-draw. + * bindPS=false is used in shadow passes to skip the pixel shader. */ + XRESULT Draw( const Frustum& frustum, bool bindPS = true ); + + /** True once Build() has completed and at least one draw group exists. */ + bool IsReady() const { return !m_AtlasDrawGroups.empty(); } + + /** Atlas lookup (read-only access for other systems if needed). */ + const std::unordered_map& GetAtlasLookup() const { + return m_TextureAtlasLookup; + } + +private: + D3D11GraphicsEngine* m_Engine; + + // ---- Atlas textures ---- + std::array m_TextureAtlasses{}; + std::unordered_map m_TextureAtlasLookup; + + // ---- Global geometry ---- + std::unique_ptr m_StaticGlobalVertexBuffer; + std::unique_ptr m_StaticGlobalIndexBuffer; + std::unique_ptr m_GlobalInstanceIdBuffer; + std::vector m_AtlasDrawGroups; + + // (legacy slot – not yet used but reserved for future streaming) + std::unique_ptr> m_StaticVobInstanceBuffer; + + // ---- GPU culling buffers ---- + std::unique_ptr> m_VobGPUBuffer; + std::unique_ptr> m_SubmeshGPUBuffer; + std::unique_ptr> m_InstanceBufferGPU; + std::unique_ptr m_MergedIndirectArgs; + Microsoft::WRL::ComPtr m_IndirectArgsTemplate; + std::unique_ptr m_CullConstantBuffer; + std::vector m_MergedArgsReset; + UINT m_TotalMaxInstances = 0; + + void BuildTextureAtlasses(); + void BuildGeometryBuffers(); + void BuildGPUCullingBuffers(); +}; From 8a59521bb9af6232d26514d90f1f2c42cc6d8d56 Mon Sep 17 00:00:00 2001 From: kirides <13602143+kirides@users.noreply.github.com> Date: Sun, 15 Mar 2026 13:13:07 +0100 Subject: [PATCH 24/42] Extract atlas code to different passes --- D3D11Engine/D3D11GraphicsEngine.h | 81 +++---------------------------- 1 file changed, 8 insertions(+), 73 deletions(-) diff --git a/D3D11Engine/D3D11GraphicsEngine.h b/D3D11Engine/D3D11GraphicsEngine.h index 6a68d3e6..12ca687e 100644 --- a/D3D11Engine/D3D11GraphicsEngine.h +++ b/D3D11Engine/D3D11GraphicsEngine.h @@ -5,11 +5,11 @@ #include "D3D11ShadowMap.h" #include "D3D11ShaderManager.h" #include "D3D11PipelineStateObject.h" -#include "D3D11TextureAtlasManager.h" -#include "D3D11StructuredBuffer.h" #include "D3D11IndirectBuffer.h" #include "D3D11StreamingResourcesManager.h" #include "VobCulling.h" +#include "D3D11VobAtlasPass.h" +#include "D3D11MeshAtlasPass.h" struct RenderToDepthStencilBuffer; @@ -36,7 +36,6 @@ const unsigned int MORPHEDMESH_HIGH_BUFFER_SIZE = 20480 * sizeof( ExVertexStruct const unsigned int HUD_BUFFER_SIZE = 6 * sizeof( ExVertexStruct ); const int NUM_MAX_BONES = 96; const int unsigned INSTANCING_BUFFER_SIZE = sizeof( VobInstanceInfo ) * 2048; -constexpr size_t TEXTURE_ATLAS_MAX = DXGI_FORMAT_V408 + 1; class D3D11PointLight; @@ -61,26 +60,9 @@ struct AlphaMeshData { std::vector instances; }; -// Tracks one unique submesh in the global geometry buffer -struct StaticSubmeshEntry { - UINT indexCount; - UINT startIndexLocation; // offset into global IB - int baseVertexLocation; // offset into global VB - TextureDescriptor atlasDesc; - MeshVisualInfo* visual; // which visual owns this submesh -}; - -// Groups all submeshes that share one atlas (same DXGI_FORMAT) -struct AtlasDrawGroup { - DXGI_FORMAT format = DXGI_FORMAT_UNKNOWN; - std::vector submeshes; - std::vector indirectArgs; - std::unique_ptr indirectBuffer; - UINT mergedArgsOffset = 0; // byte offset into merged indirect args buffer - UINT mergedArgsCount = 0; // number of args in this group -}; - class D3D11GraphicsEngine : public D3D11GraphicsEngineBase { + friend class D3D11VobAtlasPass; + friend class D3D11MeshAtlasPass; public: D3D11GraphicsEngine(); ~D3D11GraphicsEngine() override; @@ -277,15 +259,6 @@ class D3D11GraphicsEngine : public D3D11GraphicsEngineBase { XRESULT DrawVOBsInstanced(); XRESULT DrawFrameAlphaMeshes(); - /** Draws static vobs using atlas indirect path */ - XRESULT DrawVOBsIndirect( const Frustum& frustum, bool bindPS = true ); - - /** Builds global VB/IB and indirect args from atlas data (called from OnWorldLoaded) */ - void BuildStaticGeometryBuffers(); - - /** Builds GPU data for compute shader culling (called after BuildStaticGeometryBuffers) */ - void BuildGPUCullingBuffers(); - /** Set wind props in const buffer */ void ApplyWindProps( VS_ExConstantBuffer_Wind& windBuff ); @@ -403,15 +376,6 @@ class D3D11GraphicsEngine : public D3D11GraphicsEngineBase { void StorePrevViewProjMatrix(); - void BuildSceneTextureAtlasses(); - - /** World mesh atlas: collect textures, build atlases, merge geometry */ - void BuildWorldMeshTextureAtlasses(); - void BuildStaticWorldMeshBuffers(); - - /** Draw world mesh using atlas indirect path */ - XRESULT DrawWorldMesh_Atlas(); - void CacheWorldStaticVobs(); /** Pipeline state cache for minimizing redundant D3D11 state transitions */ @@ -535,39 +499,10 @@ class D3D11GraphicsEngine : public D3D11GraphicsEngineBase { std::vector m_StaticVobs{}; std::vector m_StaticVobsAABBs{}; - std::array m_TextureAtlasses{}; - - /** Atlas indirect draw path */ - std::unordered_map m_TextureAtlasLookup; - std::unique_ptr m_StaticGlobalVertexBuffer; - std::unique_ptr m_StaticGlobalIndexBuffer; - std::unique_ptr m_GlobalInstanceIdBuffer; - std::vector m_AtlasDrawGroups; - std::unique_ptr> m_StaticVobInstanceBuffer; - - /** GPU culling buffers (created once at world load) */ - std::unique_ptr> m_VobGPUBuffer; - std::unique_ptr> m_SubmeshGPUBuffer; - std::unique_ptr> m_InstanceBufferGPU; - std::unique_ptr m_MergedIndirectArgs; - Microsoft::WRL::ComPtr m_IndirectArgsTemplate; - std::unique_ptr m_CullConstantBuffer; - std::vector m_MergedArgsReset; // CPU-side template for reset - UINT m_TotalMaxInstances = 0; - - /** World mesh atlas indirect draw path */ - std::unordered_map m_WorldMeshDiffuseAtlasLookup; - std::unordered_map m_WorldMeshNormalAtlasLookup; - std::unordered_map m_WorldMeshFxAtlasLookup; - std::array m_WorldMeshDiffuseAtlasses{}; - std::array m_WorldMeshNormalAtlasses{}; - std::array m_WorldMeshFxAtlasses{}; - std::unique_ptr m_WorldMeshGlobalVertexBuffer; - std::unique_ptr m_WorldMeshGlobalIndexBuffer; - std::unique_ptr m_WorldMeshGlobalInstanceIdBuffer; - std::unique_ptr> m_WorldMeshSubmeshBuffer; - std::vector m_WorldMeshAtlasDrawGroups; - std::unordered_set m_WorldMeshAtlasedSubmeshes; // submeshes in the atlas (for legacy filter) + + /** Atlas rendering passes */ + std::unique_ptr m_VobAtlasPass; + std::unique_ptr m_MeshAtlasPass; /** Hi-Z occlusion culling resources */ Microsoft::WRL::ComPtr m_HiZTexture; // Full mip-chain, SRV-only From 98fdea5a923249c12c0fca0d3bcf0d336206dc07 Mon Sep 17 00:00:00 2001 From: kirides <13602143+kirides@users.noreply.github.com> Date: Sun, 15 Mar 2026 13:36:37 +0100 Subject: [PATCH 25/42] more debugging --- D3D11Engine/D3D11VobAtlasPass.cpp | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/D3D11Engine/D3D11VobAtlasPass.cpp b/D3D11Engine/D3D11VobAtlasPass.cpp index aedaeaba..0a3e62b9 100644 --- a/D3D11Engine/D3D11VobAtlasPass.cpp +++ b/D3D11Engine/D3D11VobAtlasPass.cpp @@ -14,6 +14,7 @@ #include "zCTexture.h" #include "zCMaterial.h" #include "zCVob.h" +#include "zCVisual.h" #include #include @@ -76,14 +77,18 @@ void D3D11VobAtlasPass::BuildTextureAtlasses() { for ( auto vobInfo : m_Engine->m_StaticVobs ) { for ( auto& byTex : reinterpret_cast(vobInfo->VisualInfo)->MeshesByTexture ) { - zCTexture* tex = byTex.first.Texture; - if ( !tex ) - tex = byTex.first.Material->GetTexture(); - if ( !tex ) - tex = byTex.first.Material->GetAniTexture(); - - if ( !tex || !seenTextures.insert( tex ).second ) { - LogError() << "Texture not found for visual " << vobInfo->VisualInfo->VisualName; + zCTexture* tex = byTex.first.Material->GetTexture(); + + if ( !tex ) { + auto vis = reinterpret_cast(vobInfo->VisualInfo)->Visual; + LogError() + << "Texture not found for visual " << vobInfo->VisualInfo->VisualName + << " Visual Type: " << vis->GetVisualType(); + + continue; + } + + if ( !seenTextures.insert( tex ).second ) { continue; } From f0c935f6b88de7d6d2b0f304000e67a06db61259 Mon Sep 17 00:00:00 2001 From: kirides <13602143+kirides@users.noreply.github.com> Date: Sun, 15 Mar 2026 13:39:31 +0100 Subject: [PATCH 26/42] for now disable VOB atlassing, as that leads to missing textures for certain vobs, like the doors in irdorath or some things in pirates camp. --- D3D11Engine/D3D11GraphicsEngine.cpp | 5 ++++- D3D11Engine/GothicGraphicsState.h | 1 - 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/D3D11Engine/D3D11GraphicsEngine.cpp b/D3D11Engine/D3D11GraphicsEngine.cpp index c4857428..39d142ee 100644 --- a/D3D11Engine/D3D11GraphicsEngine.cpp +++ b/D3D11Engine/D3D11GraphicsEngine.cpp @@ -611,8 +611,11 @@ XRESULT D3D11GraphicsEngine::Init() { // to support more memory intensive features, even on less than 4GB cards, by streaming in the necessary tiles. SupportTextureAtlases = true; Engine::GAPI->GetRendererState().RendererSettings.UseIndirectVobShadows = SupportTextureAtlases; - Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.FeatureSet.EnableAtlasStaticVobs = SupportTextureAtlases; Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.FeatureSet.EnableAtlasWorldMesh = SupportTextureAtlases; + + // VOB atlas is currently bugged, due to some vobs not getting their correct textures, + // likely due to being "animated" and at world load no animation has happened yet. + Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.FeatureSet.EnableAtlasStaticVobs = false; } } diff --git a/D3D11Engine/GothicGraphicsState.h b/D3D11Engine/GothicGraphicsState.h index 2d3ee31d..044b3614 100644 --- a/D3D11Engine/GothicGraphicsState.h +++ b/D3D11Engine/GothicGraphicsState.h @@ -960,7 +960,6 @@ struct GothicRendererSettings { bool UseLayeredRendering; bool UseShadowAtlas; bool ForceFeatureLevel10; - bool StreamingResourcesSupported; bool EnableAtlasStaticVobs; bool EnableAtlasWorldMesh; } FeatureSet; From 3554b04c70cddb31bdcadfdcc5e9275f243fb2c8 Mon Sep 17 00:00:00 2001 From: kirides <13602143+kirides@users.noreply.github.com> Date: Sun, 15 Mar 2026 13:52:15 +0100 Subject: [PATCH 27/42] atlas: reserve indirect args vectors --- D3D11Engine/D3D11VobAtlasPass.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/D3D11Engine/D3D11VobAtlasPass.cpp b/D3D11Engine/D3D11VobAtlasPass.cpp index 0a3e62b9..a5e6fbea 100644 --- a/D3D11Engine/D3D11VobAtlasPass.cpp +++ b/D3D11Engine/D3D11VobAtlasPass.cpp @@ -300,6 +300,11 @@ void D3D11VobAtlasPass::BuildGPUCullingBuffers() { // --- 2. Build merged indirect args + SubmeshGPUData --- std::vector mergedArgs; std::unordered_map> visualSubmeshMap; + { + size_t totalSubmeshes = 0; + for ( const auto& group : m_AtlasDrawGroups ) totalSubmeshes += group.submeshes.size(); + mergedArgs.reserve( totalSubmeshes ); + } UINT runningInstanceOffset = 0; UINT globalArgIndex = 0; @@ -344,11 +349,11 @@ void D3D11VobAtlasPass::BuildGPUCullingBuffers() { struct VisualSubmeshRange { UINT start; UINT count; }; std::unordered_map visualSubmeshRanges; std::vector submeshGPU; + submeshGPU.reserve( mergedArgs.size() ); for ( auto& [visual, entries] : visualSubmeshMap ) { UINT start = static_cast(submeshGPU.size()); - for ( auto& entry : entries ) - submeshGPU.push_back( entry ); + submeshGPU.insert( submeshGPU.end(), entries.begin(), entries.end() ); visualSubmeshRanges[visual] = { start, static_cast(entries.size()) }; } From 33ddd5be24630e9233414618e2100c21fe5280f0 Mon Sep 17 00:00:00 2001 From: kirides <13602143+kirides@users.noreply.github.com> Date: Sun, 15 Mar 2026 13:54:18 +0100 Subject: [PATCH 28/42] atlas: only mesh atlas if enabled --- D3D11Engine/D3D11GraphicsEngine.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/D3D11Engine/D3D11GraphicsEngine.cpp b/D3D11Engine/D3D11GraphicsEngine.cpp index 39d142ee..13156336 100644 --- a/D3D11Engine/D3D11GraphicsEngine.cpp +++ b/D3D11Engine/D3D11GraphicsEngine.cpp @@ -3931,7 +3931,9 @@ XRESULT D3D11GraphicsEngine::DrawWorldMesh_Indirect( bool noTextures ) { Engine::GAPI->ResetWorldTransform(); // Draw atlas path first (handles opaque + alpha-test submeshes that were atlased) - m_MeshAtlasPass->Draw(); + if ( Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.FeatureSet.EnableAtlasWorldMesh ) { + m_MeshAtlasPass->Draw(); + } struct MDI_DrawArgs { @@ -4203,7 +4205,9 @@ XRESULT D3D11GraphicsEngine::DrawWorldMesh( bool noTextures ) { Engine::GAPI->SetViewTransformXM( view ); Engine::GAPI->ResetWorldTransform(); - m_MeshAtlasPass->Draw(); + if ( Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.FeatureSet.EnableAtlasWorldMesh ) { + m_MeshAtlasPass->Draw(); + } SetActivePixelShader( PShaderID::PS_Diffuse ); SetActiveVertexShader( VShaderID::VS_Ex ); From 3539892152f17f6aac853624f242a07685989e5a Mon Sep 17 00:00:00 2001 From: kirides <13602143+kirides@users.noreply.github.com> Date: Sun, 15 Mar 2026 14:41:36 +0100 Subject: [PATCH 29/42] throw out any cached textures after building the texture atlases --- D3D11Engine/D3D11GraphicsEngine.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/D3D11Engine/D3D11GraphicsEngine.cpp b/D3D11Engine/D3D11GraphicsEngine.cpp index 13156336..576d8852 100644 --- a/D3D11Engine/D3D11GraphicsEngine.cpp +++ b/D3D11Engine/D3D11GraphicsEngine.cpp @@ -8141,6 +8141,11 @@ void D3D11GraphicsEngine::OnWorldLoaded() // --- Build world mesh atlas: collect textures, build atlases, merge geometry --- m_MeshAtlasPass->Build(); + + if ( Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.FeatureSet.EnableAtlasWorldMesh + || Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.FeatureSet.EnableAtlasStaticVobs ) { + Engine::GAPI->ReloadTextures(); + } } void D3D11GraphicsEngine::StoreVobPreviousTransforms() { From 318dff6269ad1faf30ca58bc441d1e17cceb13e7 Mon Sep 17 00:00:00 2001 From: kirides <13602143+kirides@users.noreply.github.com> Date: Sun, 15 Mar 2026 14:43:06 +0100 Subject: [PATCH 30/42] delete streaming manager code, as streaming only using main thread is way too expensive --- D3D11Engine/D3D11Engine.vcxproj | 2 - D3D11Engine/D3D11GraphicsEngine.h | 1 - .../D3D11StreamingResourcesManager.cpp | 1548 ----------------- D3D11Engine/D3D11StreamingResourcesManager.h | 261 --- 4 files changed, 1812 deletions(-) delete mode 100644 D3D11Engine/D3D11StreamingResourcesManager.cpp delete mode 100644 D3D11Engine/D3D11StreamingResourcesManager.h diff --git a/D3D11Engine/D3D11Engine.vcxproj b/D3D11Engine/D3D11Engine.vcxproj index 323fad8b..ee9c019d 100644 --- a/D3D11Engine/D3D11Engine.vcxproj +++ b/D3D11Engine/D3D11Engine.vcxproj @@ -855,7 +855,6 @@ copy "$(OutDir)$(TargetName).pdb" "$(G1_SYSTEM_PATH)\ddraw.pdb" - @@ -1118,7 +1117,6 @@ copy "$(OutDir)$(TargetName).pdb" "$(G1_SYSTEM_PATH)\ddraw.pdb" - diff --git a/D3D11Engine/D3D11GraphicsEngine.h b/D3D11Engine/D3D11GraphicsEngine.h index 12ca687e..fd9be714 100644 --- a/D3D11Engine/D3D11GraphicsEngine.h +++ b/D3D11Engine/D3D11GraphicsEngine.h @@ -6,7 +6,6 @@ #include "D3D11ShaderManager.h" #include "D3D11PipelineStateObject.h" #include "D3D11IndirectBuffer.h" -#include "D3D11StreamingResourcesManager.h" #include "VobCulling.h" #include "D3D11VobAtlasPass.h" #include "D3D11MeshAtlasPass.h" diff --git a/D3D11Engine/D3D11StreamingResourcesManager.cpp b/D3D11Engine/D3D11StreamingResourcesManager.cpp deleted file mode 100644 index 2b8853f2..00000000 --- a/D3D11Engine/D3D11StreamingResourcesManager.cpp +++ /dev/null @@ -1,1548 +0,0 @@ -#include "pch.h" -#include "D3D11StreamingResourcesManager.h" -#include "Logger.h" - -#include -#include -#include - -using Microsoft::WRL::ComPtr; - -// ============================================================================= -// Lifecycle -// ============================================================================= - -D3D11StreamingResourcesManager::~D3D11StreamingResourcesManager() { - Shutdown(); -} - -bool D3D11StreamingResourcesManager::GetIsStreamingSupported( ID3D11Device1* device ) { - if ( !device ) - return false; - - ComPtr device2; - if ( FAILED( device->QueryInterface( IID_PPV_ARGS( &device2 ) ) ) ) - return false; - - D3D11_FEATURE_DATA_D3D11_OPTIONS1 options1 = {}; - if ( FAILED( device2->CheckFeatureSupport( - D3D11_FEATURE_D3D11_OPTIONS1, &options1, sizeof( options1 ) ) ) ) - return false; - - // Tier 1 gives us reserved textures + tile pool + UpdateTileMappings. - // Tier 2 adds clamped LOD feedback (nice but not required). - return options1.TiledResourcesTier >= D3D11_TILED_RESOURCES_TIER_2; -} - -bool D3D11StreamingResourcesManager::Init( ID3D11Device1* device, ID3D11DeviceContext1* context ) { - if ( !device || !context ) - return false; - - // QueryInterface up to ID3D11Device2 / ID3D11DeviceContext2 - if ( FAILED( device->QueryInterface( IID_PPV_ARGS( &m_Device2 ) ) ) ) { - LogError() << "[StreamingResources] Failed to QueryInterface ID3D11Device2"; - return false; - } - - if ( FAILED( context->QueryInterface( IID_PPV_ARGS( &m_Context2 ) ) ) ) { - LogError() << "[StreamingResources] Failed to QueryInterface ID3D11DeviceContext2"; - return false; - } - - m_Device = device; - m_Context = context; - m_Initialized = true; - - LogInfo() << "[StreamingResources] Initialized successfully (Tiled Resources supported)"; - return true; -} - -void D3D11StreamingResourcesManager::Shutdown() { - OnWorldUnloaded(); - - m_Context2.Reset(); - m_Device2.Reset(); - m_Context.Reset(); - m_Device.Reset(); - m_Initialized = false; -} - -void D3D11StreamingResourcesManager::OnWorldUnloaded() { - // Clear all tile tracking - m_TileStates.clear(); - while ( !m_LoadQueue.empty() ) m_LoadQueue.pop(); - m_UnloadCandidates.clear(); - m_LoadedSources.clear(); - - // Release tiled atlas textures - m_TiledAtlases.clear(); - - // Release tile pools - m_TilePools.clear(); - m_DefaultTiles.clear(); - - // Release source texture references - m_SourceTextures.clear(); - - // Clear global source offset tracking - m_GlobalSourceOffsets.clear(); - m_TotalSourceCount = 0; - - // Reset staging ring - for ( auto& s : m_StagingRing ) { - s.texture.Reset(); - s.inUse = false; - } - m_StagingRingHead = 0; -} - -// ============================================================================= -// Feedback Query Methods -// ============================================================================= - -UINT D3D11StreamingResourcesManager::GetGlobalSourceOffset( DXGI_FORMAT fmt ) const { - auto it = m_GlobalSourceOffsets.find( fmt ); - return ( it != m_GlobalSourceOffsets.end() ) ? it->second : 0; -} - -UINT D3D11StreamingResourcesManager::GetTotalSourceCount() const { - return m_TotalSourceCount; -} - -const std::vector& -D3D11StreamingResourcesManager::GetSourceTextures( DXGI_FORMAT fmt ) const { - auto it = m_SourceTextures.find( fmt ); - if ( it != m_SourceTextures.end() ) - return it->second; - static const std::vector empty; - return empty; -} - -// ============================================================================= -// Key generation -// ============================================================================= - -uint64_t D3D11StreamingResourcesManager::MakeTileKey( - DXGI_FORMAT fmt, UINT subresource, UINT tileX, UINT tileY ) { - // Pack into 64 bits: [fmt:16][subresource:16][tileX:16][tileY:16] - return ( static_cast( fmt ) << 48 ) - | ( static_cast( subresource & 0xFFFF ) << 32 ) - | ( static_cast( tileX & 0xFFFF ) << 16 ) - | ( static_cast( tileY & 0xFFFF ) ); -} - -uint64_t D3D11StreamingResourcesManager::MakeSourceKey( - DXGI_FORMAT fmt, UINT sourceIndex, UINT mip ) { - // Pack into 64 bits: [fmt:16][sourceIndex:32][mip:16] - return ( static_cast( fmt ) << 48 ) - | ( static_cast( sourceIndex ) << 16 ) - | ( static_cast( mip & 0xFFFF ) ); -} - -// ============================================================================= -// Tile Pool Management -// ============================================================================= - -bool D3D11StreamingResourcesManager::CreateTilePool( DXGI_FORMAT fmt, UINT numTiles ) { - D3D11_BUFFER_DESC poolDesc = {}; - poolDesc.ByteWidth = numTiles * TILE_SIZE_BYTES; - poolDesc.Usage = D3D11_USAGE_DEFAULT; - poolDesc.MiscFlags = D3D11_RESOURCE_MISC_TILE_POOL; - - TilePool pool; - pool.totalTiles = numTiles; - pool.usedTiles = 0; - - HRESULT hr = m_Device2->CreateBuffer( &poolDesc, nullptr, pool.buffer.GetAddressOf() ); - if ( FAILED( hr ) ) { - LogError() << "[StreamingResources] Failed to create tile pool for format " - << static_cast( fmt ) << " (hr=" << hr << ")"; - return false; - } - - m_TilePools[fmt] = std::move( pool ); - - LogInfo() << "[StreamingResources] Created tile pool: " << numTiles << " tiles (" - << ( numTiles * TILE_SIZE_BYTES / ( 1024 * 1024 ) ) << " MB) for format " - << static_cast( fmt ); - return true; -} - -void D3D11StreamingResourcesManager::GrowTilePool( DXGI_FORMAT fmt, UINT additionalTiles ) { - auto it = m_TilePools.find( fmt ); - if ( it == m_TilePools.end() ) - return; - - TilePool& pool = it->second; - UINT newTotal = pool.totalTiles + additionalTiles; - UINT64 newSizeBytes = static_cast( newTotal ) * TILE_SIZE_BYTES; - - // ID3D11DeviceContext2::ResizeTilePool resizes the pool buffer in-place. - // Existing tile data is preserved; new tiles are appended. - HRESULT hr = m_Context2->ResizeTilePool( pool.buffer.Get(), newSizeBytes ); - if ( FAILED( hr ) ) { - LogWarn() << "[StreamingResources] Failed to grow tile pool for format " - << static_cast( fmt ) << " (hr=" << hr << ")"; - return; - } - - pool.totalTiles = newTotal; - - LogInfo() << "[StreamingResources] Grew tile pool to " << newTotal << " tiles (" - << ( newTotal * TILE_SIZE_BYTES / ( 1024 * 1024 ) ) << " MB) for format " - << static_cast( fmt ); -} - -UINT D3D11StreamingResourcesManager::AllocateTile( DXGI_FORMAT fmt ) { - auto it = m_TilePools.find( fmt ); - if ( it == m_TilePools.end() ) - return UINT_MAX; - - TilePool& pool = it->second; - - // Prefer recycled tiles - if ( !pool.freeTiles.empty() ) { - UINT idx = pool.freeTiles.back(); - pool.freeTiles.pop_back(); - return idx; - } - - // Allocate from high-water mark - if ( pool.usedTiles < pool.totalTiles ) { - return pool.usedTiles++; - } - - // Pool exhausted — grow it - UINT growth = std::max( pool.totalTiles / 2, 64 ); - GrowTilePool( fmt, growth ); - - if ( pool.usedTiles < pool.totalTiles ) { - return pool.usedTiles++; - } - - LogError() << "[StreamingResources] Tile pool exhausted and growth failed for format " - << static_cast( fmt ); - return UINT_MAX; -} - -void D3D11StreamingResourcesManager::FreeTile( DXGI_FORMAT fmt, UINT tileIndex ) { - auto it = m_TilePools.find( fmt ); - if ( it == m_TilePools.end() ) - return; - - it->second.freeTiles.push_back( tileIndex ); -} - -// ============================================================================= -// Default Tile -// ============================================================================= - -void D3D11StreamingResourcesManager::FillDefaultTileData( - DXGI_FORMAT fmt, std::vector& outData ) { - outData.resize( TILE_SIZE_BYTES ); - - switch ( fmt ) { - case DXGI_FORMAT_BC1_UNORM: - case DXGI_FORMAT_BC1_UNORM_SRGB: { - // BC1 block: 8 bytes per 4x4 pixel block - // Magenta = (255, 0, 255) encoded as two 16-bit RGB565 endpoints - // RGB565: R=31, G=0, B=31 → 0xF81F - uint8_t block[8] = {}; - uint16_t color = 0xF81F; // magenta in RGB565 - memcpy( &block[0], &color, 2 ); // color0 - memcpy( &block[2], &color, 2 ); // color1 - // Indices: all 0 (use color0) → block[4..7] = 0x00 - block[4] = 0x00; block[5] = 0x00; block[6] = 0x00; block[7] = 0x00; - - for ( UINT i = 0; i < TILE_SIZE_BYTES; i += 8 ) { - memcpy( outData.data() + i, block, 8 ); - } - break; - } - case DXGI_FORMAT_BC2_UNORM: - case DXGI_FORMAT_BC2_UNORM_SRGB: { - // BC2 block: 16 bytes (8 alpha + 8 color) - // Transparent-black: alpha = 0 so DoAlphaTest() clips these pixels, - // preventing magenta seams on alpha-tested geometry (trees, fences). - uint8_t block[16] = {}; - // Alpha: all 0x00 (fully transparent, 4 bits per pixel, 16 pixels) - memset( &block[0], 0x00, 8 ); - // Color: black (RGB565 = 0x0000) - uint16_t color = 0x0000; - memcpy( &block[8], &color, 2 ); - memcpy( &block[10], &color, 2 ); - block[12] = 0x00; block[13] = 0x00; block[14] = 0x00; block[15] = 0x00; - - for ( UINT i = 0; i < TILE_SIZE_BYTES; i += 16 ) { - memcpy( outData.data() + i, block, 16 ); - } - break; - } - case DXGI_FORMAT_BC3_UNORM: - case DXGI_FORMAT_BC3_UNORM_SRGB: { - // BC3 block: 16 bytes (8 alpha + 8 color) - // Transparent-black: alpha = 0 so DoAlphaTest() clips these pixels, - // preventing magenta seams on alpha-tested geometry (trees, fences). - uint8_t block[16] = {}; - // Alpha: alpha0=0x00, alpha1=0x00, indices all 0 → all pixels = 0x00 - block[0] = 0x00; // alpha0 - block[1] = 0x00; // alpha1 - // Alpha indices: all 0 → bytes 2..7 = 0 - block[2] = 0x00; block[3] = 0x00; block[4] = 0x00; - block[5] = 0x00; block[6] = 0x00; block[7] = 0x00; - // Color: black (RGB565 = 0x0000) - uint16_t color = 0x0000; - memcpy( &block[8], &color, 2 ); - memcpy( &block[10], &color, 2 ); - block[12] = 0x00; block[13] = 0x00; block[14] = 0x00; block[15] = 0x00; - - for ( UINT i = 0; i < TILE_SIZE_BYTES; i += 16 ) { - memcpy( outData.data() + i, block, 16 ); - } - break; - } - case DXGI_FORMAT_B8G8R8A8_UNORM: - case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB: { - // BGRA: B=0xFF, G=0x00, R=0xFF, A=0xFF → magenta - for ( UINT i = 0; i < TILE_SIZE_BYTES; i += 4 ) { - outData[i + 0] = 0xFF; // B - outData[i + 1] = 0x00; // G - outData[i + 2] = 0xFF; // R - outData[i + 3] = 0xFF; // A - } - break; - } - case DXGI_FORMAT_R8G8B8A8_UNORM: - case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB: { - // RGBA: R=0xFF, G=0x00, B=0xFF, A=0xFF → magenta - for ( UINT i = 0; i < TILE_SIZE_BYTES; i += 4 ) { - outData[i + 0] = 0xFF; // R - outData[i + 1] = 0x00; // G - outData[i + 2] = 0xFF; // B - outData[i + 3] = 0xFF; // A - } - break; - } - default: - // For unknown formats, fill with 0xFF pattern - memset( outData.data(), 0xFF, TILE_SIZE_BYTES ); - break; - } -} - -void D3D11StreamingResourcesManager::InitDefaultTile( DXGI_FORMAT fmt ) { - auto poolIt = m_TilePools.find( fmt ); - if ( poolIt == m_TilePools.end() ) - return; - - // Reserve tile 0 as the default tile - TilePool& pool = poolIt->second; - if ( pool.usedTiles == 0 ) - pool.usedTiles = 1; // tile 0 is reserved - - // Generate magenta fill data - std::vector tileData; - FillDefaultTileData( fmt, tileData ); - - // Upload default tile data to the tile pool at index 0. - // We use UpdateTileMappings to map a temporary tiled texture tile to pool[0], - // then write the data. But since we can't write directly to a tile pool, - // we'll write via the atlas itself after mapping. - // - // For the initial setup, we write the default tile data via a staging texture - // and CopySubresourceRegion to a temporary tile-mapped region. - // Alternatively, we can use ID3D11DeviceContext2::UpdateTiles. - auto atlasIt = m_TiledAtlases.find( fmt ); - if ( atlasIt == m_TiledAtlases.end() ) - return; - - const TiledAtlas& atlas = atlasIt->second; - - // First, map tile (0,0) of the coarsest mip to pool tile 0 - D3D11_TILED_RESOURCE_COORDINATE coord = {}; - coord.Subresource = D3D11CalcSubresource( atlas.mipLevels - 1, 0, atlas.mipLevels ); - coord.X = 0; - coord.Y = 0; - coord.Z = 0; - - D3D11_TILE_REGION_SIZE regionSize = {}; - regionSize.NumTiles = 1; - regionSize.bUseBox = FALSE; - - UINT poolOffset = 0; // tile index 0 - - m_Context2->UpdateTileMappings( - atlas.texture.Get(), - 1, // numRegions - &coord, - ®ionSize, - pool.buffer.Get(), - 1, // numRanges - nullptr, // rangeFlags (nullptr = default = use tile pool offsets) - &poolOffset, - nullptr, // rangeTileCounts (nullptr with single tile) - 0 // flags - ); - - // Now use UpdateTiles to write the data directly to the mapped tile - D3D11_TILED_RESOURCE_COORDINATE updateCoord = coord; - D3D11_TILE_REGION_SIZE updateRegion = regionSize; - - m_Context2->UpdateTiles( - atlas.texture.Get(), - &updateCoord, - &updateRegion, - tileData.data(), - 0 // flags - ); - - DefaultTile& dt = m_DefaultTiles[fmt]; - dt.poolIndex = 0; - dt.initialized = true; - - LogInfo() << "[StreamingResources] Default magenta tile initialized for format " - << static_cast( fmt ); -} - -// ============================================================================= -// Tile Mapping -// ============================================================================= - -void D3D11StreamingResourcesManager::MapTileToDefault( - const TiledAtlas& atlas, UINT subresource, - UINT tileX, UINT tileY, DXGI_FORMAT fmt ) { - - auto dtIt = m_DefaultTiles.find( fmt ); - if ( dtIt == m_DefaultTiles.end() || !dtIt->second.initialized ) - return; - - auto poolIt = m_TilePools.find( fmt ); - if ( poolIt == m_TilePools.end() ) - return; - - D3D11_TILED_RESOURCE_COORDINATE coord = {}; - coord.Subresource = subresource; - coord.X = tileX; - coord.Y = tileY; - coord.Z = 0; - - D3D11_TILE_REGION_SIZE regionSize = {}; - regionSize.NumTiles = 1; - regionSize.bUseBox = FALSE; - - UINT poolOffset = dtIt->second.poolIndex; // always tile 0 - - // Map this tile to the shared default tile (many-to-one mapping is allowed) - m_Context2->UpdateTileMappings( - atlas.texture.Get(), - 1, &coord, ®ionSize, - poolIt->second.buffer.Get(), - 1, nullptr, &poolOffset, nullptr, - 0 - ); -} - -void D3D11StreamingResourcesManager::MapTileToPool( - const TiledAtlas& atlas, UINT subresource, - UINT tileX, UINT tileY, UINT poolTileIndex, - DXGI_FORMAT fmt ) { - - auto poolIt = m_TilePools.find( fmt ); - if ( poolIt == m_TilePools.end() ) - return; - - D3D11_TILED_RESOURCE_COORDINATE coord = {}; - coord.Subresource = subresource; - coord.X = tileX; - coord.Y = tileY; - coord.Z = 0; - - D3D11_TILE_REGION_SIZE regionSize = {}; - regionSize.NumTiles = 1; - regionSize.bUseBox = FALSE; - - m_Context2->UpdateTileMappings( - atlas.texture.Get(), - 1, &coord, ®ionSize, - poolIt->second.buffer.Get(), - 1, nullptr, &poolTileIndex, nullptr, - 0 - ); -} - -void D3D11StreamingResourcesManager::MapAllTilesToDefault( - const TiledAtlas& atlas, DXGI_FORMAT fmt ) { - - auto dtIt = m_DefaultTiles.find( fmt ); - if ( dtIt == m_DefaultTiles.end() || !dtIt->second.initialized ) - return; - auto poolIt = m_TilePools.find( fmt ); - if ( poolIt == m_TilePools.end() ) - return; - - // Use GetResourceTiling to discover the tile layout - UINT numTilesForResource = 0; - D3D11_PACKED_MIP_DESC packedMipDesc = {}; - D3D11_TILE_SHAPE tileShape = {}; - UINT numSubresourceTilings = atlas.mipLevels * atlas.arraySlices; - std::vector subresourceTilings( numSubresourceTilings ); - - m_Device2->GetResourceTiling( - atlas.texture.Get(), - &numTilesForResource, - &packedMipDesc, - &tileShape, - &numSubresourceTilings, - 0, - subresourceTilings.data() - ); - - // Map all tiles across all subresources to the default tile - UINT defaultPoolOffset = dtIt->second.poolIndex; - - for ( UINT sub = 0; sub < numSubresourceTilings; ++sub ) { - const auto& tiling = subresourceTilings[sub]; - if ( tiling.WidthInTiles == 0 || tiling.HeightInTiles == 0 ) - continue; - - UINT totalTilesInSub = tiling.WidthInTiles * tiling.HeightInTiles; - - // Map the entire subresource to the default tile using a single call - D3D11_TILED_RESOURCE_COORDINATE coord = {}; - coord.Subresource = sub; - coord.X = 0; - coord.Y = 0; - coord.Z = 0; - - D3D11_TILE_REGION_SIZE regionSize = {}; - regionSize.NumTiles = totalTilesInSub; - regionSize.bUseBox = TRUE; - regionSize.Width = tiling.WidthInTiles; - regionSize.Height = tiling.HeightInTiles; - regionSize.Depth = 1; - - // All tiles map to the same default pool tile (reuse mapping) - UINT rangeFlag = D3D11_TILE_RANGE_REUSE_SINGLE_TILE; - UINT rangeCount = totalTilesInSub; - - m_Context2->UpdateTileMappings( - atlas.texture.Get(), - 1, &coord, ®ionSize, - poolIt->second.buffer.Get(), - 1, &rangeFlag, &defaultPoolOffset, &rangeCount, - 0 - ); - - // Record tile states as Unmapped (pointing to default) - UINT mip = sub % atlas.mipLevels; - for ( UINT ty = 0; ty < tiling.HeightInTiles; ++ty ) { - for ( UINT tx = 0; tx < tiling.WidthInTiles; ++tx ) { - uint64_t key = MakeTileKey( fmt, sub, tx, ty ); - TileInfo& info = m_TileStates[key]; - info.state = TileState::Unmapped; - info.subresource = sub; - info.tileX = tx; - info.tileY = ty; - info.format = fmt; - info.poolTileIndex = 0; - info.lastUsedTime = 0.0f; - info.priority = 0.0f; - } - } - } - - // Handle packed mips (mips packed into shared tiles at the tail of the resource) - if ( packedMipDesc.NumPackedMips > 0 && packedMipDesc.NumTilesForPackedMips > 0 ) { - // For packed mips, we map the packed tile region for each array slice. - // Each slice has NumTilesForPackedMips consecutive tiles starting at - // packedMipDesc.StartTileIndexInOverallResource (for slice 0). - for ( UINT slice = 0; slice < atlas.arraySlices; ++slice ) { - UINT startTile = packedMipDesc.StartTileIndexInOverallResource - + slice * packedMipDesc.NumTilesForPackedMips; - - // Use NULL coordinates + NULL region to map by absolute tile index - UINT rangeFlag = D3D11_TILE_RANGE_REUSE_SINGLE_TILE; - UINT rangeCount = packedMipDesc.NumTilesForPackedMips; - - m_Context2->UpdateTileMappings( - atlas.texture.Get(), - 1, nullptr, nullptr, // NULL = map by start tile offset - poolIt->second.buffer.Get(), - 1, &rangeFlag, &defaultPoolOffset, &rangeCount, - 0 - ); - } - } - - LogInfo() << "[StreamingResources] Mapped all " << numTilesForResource - << " tiles to default for format " << static_cast( fmt ); -} - -// ============================================================================= -// Subresource Tile Layout -// ============================================================================= - -void D3D11StreamingResourcesManager::GetSubresourceTileCount( - const TiledAtlas& atlas, UINT mipLevel, - UINT& tilesX, UINT& tilesY ) const { - - UINT numSubresourceTilings = atlas.mipLevels * atlas.arraySlices; - std::vector tilings( numSubresourceTilings ); - UINT totalTiles = 0; - D3D11_PACKED_MIP_DESC packedDesc = {}; - D3D11_TILE_SHAPE tileShape = {}; - - m_Device2->GetResourceTiling( - atlas.texture.Get(), - &totalTiles, &packedDesc, &tileShape, - &numSubresourceTilings, 0, tilings.data() - ); - - if ( mipLevel < numSubresourceTilings ) { - tilesX = tilings[mipLevel].WidthInTiles; - tilesY = tilings[mipLevel].HeightInTiles; - } else { - tilesX = 0; - tilesY = 0; - } -} - -// ============================================================================= -// Atlas Creation -// ============================================================================= - -TextureManager::AtlasResult D3D11StreamingResourcesManager::CreateStreamingAtlasArray( - std::basic_string_view sourceTextures, - UINT atlasSize, UINT mipLevels ) { - - if ( sourceTextures.empty() || !m_Initialized ) - return {}; - - TextureManager::AtlasResult result; - result.descriptors.resize( sourceTextures.size() ); - - // --- 1. Get format from first texture --- - D3D11_TEXTURE2D_DESC firstDesc; - sourceTextures[0]->GetDesc( &firstDesc ); - DXGI_FORMAT atlasFormat = firstDesc.Format; - - // --- 2. Run the same shelf-packing algorithm as TextureManager --- - const UINT blockSize = []( DXGI_FORMAT fmt ) -> UINT { - switch ( fmt ) { - case DXGI_FORMAT_BC1_UNORM: case DXGI_FORMAT_BC1_UNORM_SRGB: - case DXGI_FORMAT_BC2_UNORM: case DXGI_FORMAT_BC2_UNORM_SRGB: - case DXGI_FORMAT_BC3_UNORM: case DXGI_FORMAT_BC3_UNORM_SRGB: - return 4; - default: return 1; - } - }( atlasFormat ); - - const UINT MipAlignment = blockSize * ( 1 << ( mipLevels - 1 ) ); - - struct PackItem { - int originalIndex; - UINT width, height; - UINT x, y, slice; - D3D11_TEXTURE2D_DESC desc; - }; - - std::vector items; - items.reserve( sourceTextures.size() ); - - for ( size_t i = 0; i < sourceTextures.size(); ++i ) { - D3D11_TEXTURE2D_DESC desc; - sourceTextures[i]->GetDesc( &desc ); - items.push_back( { static_cast( i ), desc.Width, desc.Height, 0, 0, 0, desc } ); - } - - // Sort by height descending for shelf packing - std::sort( items.begin(), items.end(), []( const PackItem& a, const PackItem& b ) { - return a.height > b.height; - } ); - - auto Align = []( UINT value, UINT alignment ) -> UINT { - return ( value + alignment - 1 ) & ~( alignment - 1 ); - }; - - // Shelf packing - UINT currentX = 0, currentY = 0, currentShelfHeight = 0, currentSlice = 0; - for ( auto& item : items ) { - UINT alignedW = Align( item.width, MipAlignment ); - UINT alignedH = Align( item.height, MipAlignment ); - - if ( currentX + alignedW > atlasSize ) { - currentX = 0; - currentY += Align( currentShelfHeight, MipAlignment ); - currentShelfHeight = 0; - } - if ( currentY + alignedH > atlasSize ) { - currentSlice++; - currentX = 0; - currentY = 0; - currentShelfHeight = 0; - } - - item.x = currentX; - item.y = currentY; - item.slice = currentSlice; - - currentX += alignedW; - currentShelfHeight = std::max( currentShelfHeight, alignedH ); - } - - UINT totalSlices = currentSlice + 1; - - // --- 2b. Clamp mip levels for tiled resource array constraints --- - // On Tier 2 (and Tier 1): when ArraySize > 1, every mip must have dimensions - // >= the standard tile extent. Sub-tile mips ("packed mips") are NOT supported - // for texture arrays. Determine tile dimensions and limit mip count. - if ( totalSlices > 1 ) { - UINT tileW = 128, tileH = 128; // conservative default for 32bpp - switch ( atlasFormat ) { - case DXGI_FORMAT_BC1_UNORM: case DXGI_FORMAT_BC1_UNORM_SRGB: - tileW = 512; tileH = 256; break; // 0.5 bytes/texel - case DXGI_FORMAT_BC2_UNORM: case DXGI_FORMAT_BC2_UNORM_SRGB: - case DXGI_FORMAT_BC3_UNORM: case DXGI_FORMAT_BC3_UNORM_SRGB: - tileW = 256; tileH = 256; break; // 1 byte/texel - case DXGI_FORMAT_R8G8B8A8_UNORM: case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB: - case DXGI_FORMAT_B8G8R8A8_UNORM: case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB: - tileW = 128; tileH = 128; break; // 4 bytes/texel - default: break; - } - - // Count how many mip levels fit without going below tile extents - UINT maxMips = 0; - for ( UINT m = 0; m < mipLevels; ++m ) { - UINT mipW = std::max( 1u, atlasSize >> m ); - UINT mipH = std::max( 1u, atlasSize >> m ); - if ( mipW < tileW || mipH < tileH ) - break; - maxMips = m + 1; - } - maxMips = std::max( maxMips, 1 ); // at least 1 mip - - if ( maxMips < mipLevels ) { - LogInfo() << "[StreamingResources] Clamping mip levels from " << mipLevels - << " to " << maxMips << " for array size " << totalSlices - << " (tile extent " << tileW << "x" << tileH - << ", format " << static_cast( atlasFormat ) << ")"; - mipLevels = maxMips; - } - } - - // --- 3. Create the tiled Texture2DArray --- - D3D11_TEXTURE2D_DESC arrayDesc = {}; - arrayDesc.Width = atlasSize; - arrayDesc.Height = atlasSize; - arrayDesc.MipLevels = mipLevels; - arrayDesc.ArraySize = totalSlices; - arrayDesc.Format = atlasFormat; - arrayDesc.SampleDesc.Count = 1; - arrayDesc.SampleDesc.Quality = 0; - arrayDesc.Usage = D3D11_USAGE_DEFAULT; - arrayDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE; - arrayDesc.MiscFlags = D3D11_RESOURCE_MISC_TILED; - - ComPtr tiledTexture; - HRESULT hr = m_Device2->CreateTexture2D( &arrayDesc, nullptr, tiledTexture.GetAddressOf() ); - if ( FAILED( hr ) ) { - LogError() << "[StreamingResources] Failed to create tiled Texture2DArray (hr=" << hr << ")"; - return {}; - } - - // --- 4. Create tile pool --- - if ( !CreateTilePool( atlasFormat, INITIAL_POOL_TILES ) ) { - return {}; - } - - // --- 5. Store the atlas --- - TiledAtlas& atlas = m_TiledAtlases[atlasFormat]; - atlas.texture = tiledTexture; - atlas.atlasSize = atlasSize; - atlas.mipLevels = mipLevels; - atlas.arraySlices = totalSlices; - atlas.format = atlasFormat; - - // --- 6. Initialize default tile and map all to it --- - InitDefaultTile( atlasFormat ); - MapAllTilesToDefault( atlas, atlasFormat ); - - // --- 7. Create SRV --- - D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; - srvDesc.Format = atlasFormat; - srvDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2DARRAY; - srvDesc.Texture2DArray.MostDetailedMip = 0; - srvDesc.Texture2DArray.MipLevels = mipLevels; - srvDesc.Texture2DArray.FirstArraySlice = 0; - srvDesc.Texture2DArray.ArraySize = totalSlices; - - ComPtr srv; - hr = m_Device2->CreateShaderResourceView( tiledTexture.Get(), &srvDesc, srv.GetAddressOf() ); - if ( FAILED( hr ) ) { - LogError() << "[StreamingResources] Failed to create SRV for tiled atlas (hr=" << hr << ")"; - return {}; - } - atlas.srv = srv; - - // --- 8. Store source texture info for streaming uploads --- - auto& sources = m_SourceTextures[atlasFormat]; - sources.clear(); - sources.reserve( items.size() ); - for ( const auto& item : items ) { - SourceTextureInfo si; - si.texture = sourceTextures[item.originalIndex]; - si.x = item.x; - si.y = item.y; - si.slice = item.slice; - si.width = item.width; - si.height = item.height; - si.sourceMipLevels = item.desc.MipLevels; - sources.push_back( std::move( si ) ); - } - - // --- 8b. Compute global source offsets for feedback texture indexing --- - // Each format's sources get a contiguous range in the flat feedback texture. - // This must be done after all atlas formats have been populated, but since - // CreateStreamingAtlasArray is called once per format, we recompute every time. - m_GlobalSourceOffsets.clear(); - m_TotalSourceCount = 0; - for ( const auto& [fmt, srcVec] : m_SourceTextures ) { - m_GlobalSourceOffsets[fmt] = m_TotalSourceCount; - m_TotalSourceCount += static_cast( srcVec.size() ); - } - - // --- 9. Write descriptors in original input order --- - for ( const auto& item : items ) { - TextureDescriptor& outDesc = result.descriptors[item.originalIndex]; - outDesc.slice = item.slice; - outDesc.uStart = static_cast( item.x ) / atlasSize; - outDesc.vStart = static_cast( item.y ) / atlasSize; - outDesc.uEnd = static_cast( item.x + item.width ) / atlasSize; - outDesc.vEnd = static_cast( item.y + item.height ) / atlasSize; - } - - // --- 10. Hand back raw pointers for AtlasResult (caller manages lifetime) --- - // Note: The tiled texture and SRV are owned by m_TiledAtlases; return raw ptrs - // that the AtlasResult can reference. We override Destroy() behavior by keeping - // our own refs. - result.atlasTextureArray = tiledTexture.Get(); - result.atlasSRV = srv.Get(); - - // AddRef so the raw pointers in AtlasResult remain valid - result.atlasTextureArray->AddRef(); - result.atlasSRV->AddRef(); - - // --- 11. Preload coarsest mip levels --- - PreloadCoarseMips( atlasFormat ); - - LogInfo() << "[StreamingResources] Created streaming atlas: " - << atlasSize << "x" << atlasSize << " x " << totalSlices << " slices, " - << mipLevels << " mips, format " << static_cast( atlasFormat ); - - return result; -} - -// ============================================================================= -// Preload Coarse Mips -// ============================================================================= - -void D3D11StreamingResourcesManager::PreloadCoarseMips( DXGI_FORMAT fmt ) { - auto atlasIt = m_TiledAtlases.find( fmt ); - if ( atlasIt == m_TiledAtlases.end() ) - return; - - auto srcIt = m_SourceTextures.find( fmt ); - if ( srcIt == m_SourceTextures.end() ) - return; - - const TiledAtlas& atlas = atlasIt->second; - const auto& sources = srcIt->second; - - // Query the tiling layout once to determine standard vs packed mip levels. - // Packed mips are below the minimum tile dimension and cannot have individual - // tiles mapped in a Texture2DArray — they are already covered by the default - // tile mapping established in MapAllTilesToDefault. - UINT numSubresourceTilings = atlas.mipLevels * atlas.arraySlices; - std::vector tilings( numSubresourceTilings ); - UINT totalTilesInResource = 0; - D3D11_PACKED_MIP_DESC packedDesc = {}; - D3D11_TILE_SHAPE tileShape = {}; - m_Device2->GetResourceTiling( - atlas.texture.Get(), - &totalTilesInResource, &packedDesc, &tileShape, - &numSubresourceTilings, 0, tilings.data() - ); - - // Standard mips: [0, standardMips) support per-tile mappings. - // Packed mips: [standardMips, mipLevels) share a single packed allocation. - UINT standardMips = atlas.mipLevels - packedDesc.NumPackedMips; - if ( standardMips == 0 ) { - LogInfo() << "[StreamingResources] All mips are packed for format " - << static_cast( fmt ) << " — nothing to preload"; - return; - } - - // Preload the PRELOADED_COARSE_MIPS coarsest *standard* mip levels only. - // The original code iterated atlas.mipLevels - PRELOADED_COARSE_MIPS which, - // for a 2048px BC atlas (tile=256px), lands on mip 5 (64px) — a packed mip. - // GetSubresourceTileCount then returns (0,0) and every source is skipped, - // leaving all tiles on the default magenta tile with no real data ever loaded. - UINT preloadStart = ( standardMips > PRELOADED_COARSE_MIPS ) - ? standardMips - PRELOADED_COARSE_MIPS - : 0; - - UINT tilesUploaded = 0; - - for ( UINT mip = preloadStart; mip < standardMips; ++mip ) { - for ( UINT srcIdx = 0; srcIdx < static_cast( sources.size() ); ++srcIdx ) { - const auto& src = sources[srcIdx]; - - // Missing mips are handled by GenerateMissingMips below — skip here - if ( mip >= src.sourceMipLevels ) - continue; - - UINT dstSub = D3D11CalcSubresource( mip, src.slice, atlas.mipLevels ); - if ( dstSub >= numSubresourceTilings ) - continue; - - const auto& tiling = tilings[dstSub]; - if ( tiling.WidthInTiles == 0 || tiling.HeightInTiles == 0 ) - continue; // Packed or degenerate — skip - - // Compute which tiles in this subresource are touched by the source - // texture's region. Non-uniform sources may cover only a sub-rect of - // the atlas subresource, so we must not map tiles outside that rect. - UINT regionX = src.x >> mip; - UINT regionY = src.y >> mip; - UINT regionW = std::max( 1u, src.width >> mip ); - UINT regionH = std::max( 1u, src.height >> mip ); - - UINT tileW = tileShape.WidthInTexels; - UINT tileH = tileShape.HeightInTexels; - - UINT tileStartX = regionX / tileW; - UINT tileStartY = regionY / tileH; - UINT tileEndX = std::min( tiling.WidthInTiles - 1, ( regionX + regionW - 1 ) / tileW ); - UINT tileEndY = std::min( tiling.HeightInTiles - 1, ( regionY + regionH - 1 ) / tileH ); - - // Ensure all tiles covering this source region are mapped to real pool - // tiles. Mapping MUST happen before CopySubresourceRegion, otherwise - // the GPU silently discards writes to unmapped (default) tiles. - for ( UINT ty = tileStartY; ty <= tileEndY; ++ty ) { - for ( UINT tx = tileStartX; tx <= tileEndX; ++tx ) { - uint64_t tileKey = MakeTileKey( fmt, dstSub, tx, ty ); - auto stateIt = m_TileStates.find( tileKey ); - if ( stateIt != m_TileStates.end() && - stateIt->second.state == TileState::Resident ) - continue; // Shared tile already resident from an earlier source - - UINT poolTile = AllocateTile( fmt ); - if ( poolTile == UINT_MAX ) - continue; // Pool exhausted — leave on default tile - - MapTileToPool( atlas, dstSub, tx, ty, poolTile, fmt ); - - TileInfo info; - info.state = TileState::Resident; - info.poolTileIndex = poolTile; - info.subresource = dstSub; - info.tileX = tx; - info.tileY = ty; - info.format = fmt; - info.lastUsedTime = 0.0f; - m_TileStates[tileKey] = info; - ++tilesUploaded; - } - } - - // Upload this source's pixel data. Always upload — even when all - // tiles were already Resident from an earlier source. Multiple - // sources share the same 64KB tile but occupy different (x,y) regions - // within it. Gating on "anyNewTiles" caused the second source's - // region to never be written, leaving it as uninitialized pool memory - // (black) or transparent (invisible alpha-tested geometry). - UploadTileData( atlas, dstSub, src, mip ); - - // Track as loaded so UpdateStreaming won't re-upload and eviction can - // properly invalidate this source-mip if the tile is reclaimed. - uint64_t srcKey = MakeSourceKey( fmt, srcIdx, mip ); - m_LoadedSources.insert( srcKey ); - } - } - - // Generate missing mip levels for sources that have fewer mips than the atlas. - // This is called once per source at atlas creation time; the generated data is - // uploaded immediately so coarse mips are always available. - for ( UINT srcIdx = 0; srcIdx < static_cast( sources.size() ); ++srcIdx ) { - const auto& src = sources[srcIdx]; - if ( src.sourceMipLevels < atlas.mipLevels ) { - GenerateMissingMips( atlas, src, fmt, srcIdx, - tilings, numSubresourceTilings, tileShape ); - } - } - - LogInfo() << "[StreamingResources] Preloaded " << PRELOADED_COARSE_MIPS - << " coarsest standard mip(s) (" << tilesUploaded << " tiles) for format " - << static_cast( fmt ); -} - -// ============================================================================= -// Tile Data Upload -// ============================================================================= - -void D3D11StreamingResourcesManager::UploadTileData( - const TiledAtlas& atlas, UINT subresource, - const SourceTextureInfo& src, UINT srcMip ) { - - if ( srcMip >= src.sourceMipLevels ) - return; // Source texture doesn't have this mip level - - UINT srcSub = D3D11CalcSubresource( srcMip, 0, src.sourceMipLevels ); - - // Destination offset within the atlas subresource at this mip level - UINT mipX = src.x >> srcMip; - UINT mipY = src.y >> srcMip; - - // Source region dimensions at this mip level. - // Using an explicit D3D11_BOX ensures non-uniform texture sizes are handled - // correctly — for packed atlas contents src.width/height may differ per entry, - // and a nullptr pSrcBox would copy the full source extent to an unintended region. - UINT mipW = std::max( 1u, src.width >> srcMip ); - UINT mipH = std::max( 1u, src.height >> srcMip ); - - D3D11_BOX srcBox = {}; - srcBox.left = 0; - srcBox.top = 0; - srcBox.front = 0; - srcBox.right = mipW; - srcBox.bottom = mipH; - srcBox.back = 1; - - // All tiles covering (mipX, mipY, mipW, mipH) must already be mapped to real - // pool tiles before this call — writes to unmapped tiles are silently discarded. - m_Context->CopySubresourceRegion( - atlas.texture.Get(), subresource, - mipX, mipY, 0, - src.texture.Get(), srcSub, - &srcBox - ); -} - -// ============================================================================= -// Missing Mip Generation -// ============================================================================= - -void D3D11StreamingResourcesManager::GenerateMissingMips( - const TiledAtlas& atlas, const SourceTextureInfo& src, - DXGI_FORMAT fmt, UINT srcIndex, - const std::vector& tilings, - UINT numSubresourceTilings, - const D3D11_TILE_SHAPE& tileShape ) { - - if ( src.sourceMipLevels >= atlas.mipLevels ) - return; // No missing mips - - // Capture the source texture to CPU memory (creates an internal staging copy) - DirectX::ScratchImage captured; - if ( FAILED( DirectX::CaptureTexture( m_Device.Get(), m_Context.Get(), src.texture.Get(), captured ) ) ) { - LogWarn() << "[StreamingResources] CaptureTexture failed for source " << srcIndex; - return; - } - - // Grab the last available mip as the downsampling base - const DirectX::Image* lastMipImg = captured.GetImage( src.sourceMipLevels - 1, 0, 0 ); - if ( !lastMipImg ) return; - - // GenerateMipMaps requires uncompressed input — decompress BC textures first - DirectX::ScratchImage decompressed; - const DirectX::Image* baseImg = lastMipImg; - if ( DirectX::IsCompressed( lastMipImg->format ) ) { - if ( FAILED( DirectX::Decompress( *lastMipImg, DXGI_FORMAT_R8G8B8A8_UNORM, decompressed ) ) ) { - LogWarn() << "[StreamingResources] Decompress failed for source " << srcIndex; - return; - } - baseImg = decompressed.GetImage( 0, 0, 0 ); - } - - // Generate: level 0 = base (already copied to atlas), levels 1..N = the missing mips - UINT levelsToGen = atlas.mipLevels - src.sourceMipLevels + 1; - DirectX::ScratchImage mipChain; - if ( FAILED( DirectX::GenerateMipMaps( *baseImg, DirectX::TEX_FILTER_BOX, levelsToGen, mipChain ) ) ) { - LogWarn() << "[StreamingResources] GenerateMipMaps failed for source " << srcIndex; - return; - } - - // Re-compress the generated levels back to the atlas BC format. - // Try GPU-accelerated compression first; fall back to CPU if unsupported. - const DirectX::ScratchImage* finalChain = &mipChain; - DirectX::ScratchImage recompressed; - if ( DirectX::IsCompressed( fmt ) ) { - HRESULT hr = DirectX::Compress( m_Device.Get(), - mipChain.GetImages(), mipChain.GetImageCount(), mipChain.GetMetadata(), - fmt, DirectX::TEX_COMPRESS_DEFAULT, DirectX::TEX_ALPHA_WEIGHT_DEFAULT, - recompressed ); - if ( FAILED( hr ) ) { - // GPU BC compression not supported — use CPU path - recompressed = DirectX::ScratchImage{}; - if ( FAILED( DirectX::Compress( - mipChain.GetImages(), mipChain.GetImageCount(), mipChain.GetMetadata(), - fmt, DirectX::TEX_COMPRESS_DEFAULT, DirectX::TEX_ALPHA_WEIGHT_DEFAULT, - recompressed ) ) ) { - LogWarn() << "[StreamingResources] Compress failed for source " << srcIndex; - return; - } - } - finalChain = &recompressed; - } - - // Upload each generated mip level - for ( UINT mip = src.sourceMipLevels; mip < atlas.mipLevels; ++mip ) { - // chainIdx 0 = the base (already in atlas), so generated levels start at 1 - UINT chainIdx = mip - src.sourceMipLevels + 1; - const DirectX::Image* genImg = finalChain->GetImage( chainIdx, 0, 0 ); - if ( !genImg || !genImg->pixels ) continue; - - UINT dstSub = D3D11CalcSubresource( mip, src.slice, atlas.mipLevels ); - if ( dstSub >= numSubresourceTilings ) - continue; - - // Map tiles before uploading - const auto& tiling = tilings[dstSub]; - if ( tiling.WidthInTiles > 0 && tiling.HeightInTiles > 0 ) { - UINT regionX = src.x >> mip; - UINT regionY = src.y >> mip; - UINT regionW = std::max( 1u, src.width >> mip ); - UINT regionH = std::max( 1u, src.height >> mip ); - - UINT tw = tileShape.WidthInTexels; - UINT th = tileShape.HeightInTexels; - - UINT tileStartX = regionX / tw; - UINT tileStartY = regionY / th; - UINT tileEndX = std::min( tiling.WidthInTiles - 1, ( regionX + regionW - 1 ) / tw ); - UINT tileEndY = std::min( tiling.HeightInTiles - 1, ( regionY + regionH - 1 ) / th ); - - for ( UINT ty = tileStartY; ty <= tileEndY; ++ty ) { - for ( UINT tx = tileStartX; tx <= tileEndX; ++tx ) { - uint64_t tileKey = MakeTileKey( fmt, dstSub, tx, ty ); - auto stateIt = m_TileStates.find( tileKey ); - if ( stateIt != m_TileStates.end() && - stateIt->second.state == TileState::Resident ) - continue; - - UINT poolTile = AllocateTile( fmt ); - if ( poolTile == UINT_MAX ) - continue; - - MapTileToPool( atlas, dstSub, tx, ty, poolTile, fmt ); - - TileInfo info; - info.state = TileState::Resident; - info.poolTileIndex = poolTile; - info.subresource = dstSub; - info.tileX = tx; - info.tileY = ty; - info.format = fmt; - info.lastUsedTime = 0.0f; - m_TileStates[tileKey] = info; - } - } - } - - // Create temporary immutable texture and copy to atlas - D3D11_TEXTURE2D_DESC tmpDesc = {}; - tmpDesc.Width = static_cast( genImg->width ); - tmpDesc.Height = static_cast( genImg->height ); - tmpDesc.MipLevels = 1; - tmpDesc.ArraySize = 1; - tmpDesc.Format = genImg->format; - tmpDesc.SampleDesc.Count = 1; - tmpDesc.Usage = D3D11_USAGE_IMMUTABLE; - tmpDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE; - - D3D11_SUBRESOURCE_DATA initData = {}; - initData.pSysMem = genImg->pixels; - initData.SysMemPitch = static_cast( genImg->rowPitch ); - - ComPtr tmpTex; - if ( SUCCEEDED( m_Device->CreateTexture2D( &tmpDesc, &initData, tmpTex.GetAddressOf() ) ) ) { - UINT mipX = src.x >> mip; - UINT mipY = src.y >> mip; - D3D11_BOX box = { 0, 0, 0, - static_cast( genImg->width ), - static_cast( genImg->height ), 1 }; - m_Context->CopySubresourceRegion( - atlas.texture.Get(), dstSub, mipX, mipY, 0, - tmpTex.Get(), 0, &box ); - } - - // Mark as loaded - uint64_t srcKey = MakeSourceKey( fmt, srcIndex, mip ); - m_LoadedSources.insert( srcKey ); - } - - LogInfo() << "[StreamingResources] Generated " << ( atlas.mipLevels - src.sourceMipLevels ) - << " missing mip(s) for source " << srcIndex - << " (had " << src.sourceMipLevels << ", atlas needs " << atlas.mipLevels << ")"; -} - -// ============================================================================= -// Per-Frame Streaming Update -// ============================================================================= - -void D3D11StreamingResourcesManager::UpdateStreaming( - const DirectX::XMFLOAT3& cameraPosition, float drawDistance, float currentTime, - const std::unordered_set* requestedSources ) { - - if ( !m_Initialized ) - return; - - // ------------------------------------------------------------------------- - // The streaming unit is a (source, mip) pair — NOT individual tiles. - // - // Multiple non-uniform source textures share 64KB atlas tiles. The old - // tile-level approach allocated a fresh pool tile per-source-per-tile, - // causing the same atlas tile to be re-mapped to different pool memory - // each frame (frame-flickering) and leaving newly-allocated tiles filled - // with uninitialized zeros (black). - // - // The correct flow is: - // 1. Decide which (source, mip) pairs need loading. - // 2. For each, ensure ALL atlas tiles that the source covers are mapped - // to real pool tiles (allocate only if currently default-mapped). - // 3. Upload the source mip data via CopySubresourceRegion ONCE. - // 4. Record the (source, mip) as loaded so we never re-upload it. - // ------------------------------------------------------------------------- - - // --- 1. Build fresh load queue --- - // Clear any stale requests from previous frames. The priority_queue has no - // clear(), so swap with an empty one. - { decltype( m_LoadQueue ) empty; m_LoadQueue.swap( empty ); } - - for ( auto& [fmt, sources] : m_SourceTextures ) { - auto atlasIt = m_TiledAtlases.find( fmt ); - if ( atlasIt == m_TiledAtlases.end() ) - continue; - - const TiledAtlas& atlas = atlasIt->second; - - // Query tile layout once per atlas - UINT numSubresourceTilings = atlas.mipLevels * atlas.arraySlices; - D3D11_PACKED_MIP_DESC packedDesc = {}; - D3D11_TILE_SHAPE tileShape = {}; - UINT totalTiles = 0; - { - std::vector tmp( numSubresourceTilings ); - m_Device2->GetResourceTiling( - atlas.texture.Get(), - &totalTiles, &packedDesc, &tileShape, - &numSubresourceTilings, 0, tmp.data() - ); - } - - UINT standardMips = atlas.mipLevels - packedDesc.NumPackedMips; - - // Determine the preloaded mip range so we skip those - UINT preloadStart = ( standardMips > PRELOADED_COARSE_MIPS ) - ? standardMips - PRELOADED_COARSE_MIPS - : 0; - - for ( UINT srcIdx = 0; srcIdx < static_cast( sources.size() ); ++srcIdx ) { - const auto& src = sources[srcIdx]; - - // Feedback-driven filtering: if requestedSources is provided, only stream - // sources that the GPU reported as needing data. Otherwise load everything. - if ( requestedSources ) { - UINT globalIdx = m_GlobalSourceOffsets[fmt] + srcIdx; - if ( requestedSources->count( globalIdx ) == 0 ) - continue; // Not requested by GPU feedback — skip - } - - for ( UINT mip = 0; mip < standardMips; ++mip ) { - // Skip mips that were preloaded at atlas creation - if ( mip >= preloadStart ) - continue; - - // Skip if the source doesn't have this mip — generated mips - // are handled by GenerateMissingMips during PreloadCoarseMips. - // For mips that were already generated, m_LoadedSources will - // short-circuit below. - if ( mip >= src.sourceMipLevels ) - continue; - - // Skip if already uploaded - uint64_t srcKey = MakeSourceKey( fmt, srcIdx, mip ); - if ( m_LoadedSources.count( srcKey ) ) - continue; - - // Priority: coarser mips first (higher priority number) - float priority = static_cast( atlas.mipLevels - mip ); - - StreamingRequest req; - req.sourceIndex = srcIdx; - req.priority = priority; - req.mipLevel = mip; - req.format = fmt; - m_LoadQueue.push( req ); - } - } - } - - // --- 2. Process load queue (no per-frame cap) --- - // All visible source-mips are loaded immediately. A per-frame budget previously - // caused multi-frame pop-in and invisible alpha-tested geometry (BC2/BC3 default - // tiles are transparent). The cost is bounded by the number of newly-visible - // sources, which is typically small after the initial load. - while ( !m_LoadQueue.empty() ) { - StreamingRequest req = m_LoadQueue.top(); - m_LoadQueue.pop(); - - // Double-check: may have been loaded by a higher-priority path - uint64_t srcKey = MakeSourceKey( req.format, req.sourceIndex, req.mipLevel ); - if ( m_LoadedSources.count( srcKey ) ) - continue; - - auto atlasIt = m_TiledAtlases.find( req.format ); - if ( atlasIt == m_TiledAtlases.end() ) - continue; - - auto srcIt = m_SourceTextures.find( req.format ); - if ( srcIt == m_SourceTextures.end() || req.sourceIndex >= srcIt->second.size() ) - continue; - - const TiledAtlas& atlas = atlasIt->second; - const SourceTextureInfo& src = srcIt->second[req.sourceIndex]; - - UINT dstSub = D3D11CalcSubresource( req.mipLevel, src.slice, atlas.mipLevels ); - - // Query tiling for tile shape - UINT numSubresourceTilings = atlas.mipLevels * atlas.arraySlices; - std::vector tilings( numSubresourceTilings ); - UINT totalTiles = 0; - D3D11_PACKED_MIP_DESC packedDesc = {}; - D3D11_TILE_SHAPE tileShape = {}; - m_Device2->GetResourceTiling( - atlas.texture.Get(), - &totalTiles, &packedDesc, &tileShape, - &numSubresourceTilings, 0, tilings.data() - ); - - if ( dstSub >= numSubresourceTilings ) - continue; - - const auto& tiling = tilings[dstSub]; - if ( tiling.WidthInTiles == 0 || tiling.HeightInTiles == 0 ) - continue; - - // Compute tile range this source covers at this mip - UINT regionX = src.x >> req.mipLevel; - UINT regionY = src.y >> req.mipLevel; - UINT regionW = std::max( 1u, src.width >> req.mipLevel ); - UINT regionH = std::max( 1u, src.height >> req.mipLevel ); - UINT tileW = tileShape.WidthInTexels; - UINT tileH = tileShape.HeightInTexels; - - UINT tileStartX = regionX / tileW; - UINT tileStartY = regionY / tileH; - UINT tileEndX = std::min( tiling.WidthInTiles - 1, ( regionX + regionW - 1 ) / tileW ); - UINT tileEndY = std::min( tiling.HeightInTiles - 1, ( regionY + regionH - 1 ) / tileH ); - - // Ensure all covered tiles are mapped to real pool tiles. - // Tiles may already be resident from another source that shares them — skip those. - for ( UINT ty = tileStartY; ty <= tileEndY; ++ty ) { - for ( UINT tx = tileStartX; tx <= tileEndX; ++tx ) { - uint64_t tileKey = MakeTileKey( req.format, dstSub, tx, ty ); - auto stateIt = m_TileStates.find( tileKey ); - if ( stateIt != m_TileStates.end() && - stateIt->second.state == TileState::Resident ) { - // Tile already resident — still record our source key so eviction - // of this shared tile invalidates all dependent sources. - stateIt->second.sourceKeys.push_back( srcKey ); - continue; - } - - UINT poolTile = AllocateTile( req.format ); - if ( poolTile == UINT_MAX ) - continue; // Pool exhausted — leave on default - - MapTileToPool( atlas, dstSub, tx, ty, poolTile, req.format ); - - TileInfo info; - info.state = TileState::Resident; - info.poolTileIndex = poolTile; - info.subresource = dstSub; - info.tileX = tx; - info.tileY = ty; - info.format = req.format; - info.lastUsedTime = currentTime; - info.sourceKeys.push_back( srcKey ); - m_TileStates[tileKey] = info; - } - } - - // Upload source data once (all tiles are now mapped) - UploadTileData( atlas, dstSub, src, req.mipLevel ); - - m_LoadedSources.insert( srcKey ); - - // Touch all tiles this source covers so they won't be unloaded - for ( UINT ty = tileStartY; ty <= tileEndY; ++ty ) { - for ( UINT tx = tileStartX; tx <= tileEndX; ++tx ) { - uint64_t tileKey = MakeTileKey( req.format, dstSub, tx, ty ); - auto stateIt = m_TileStates.find( tileKey ); - if ( stateIt != m_TileStates.end() ) { - stateIt->second.lastUsedTime = currentTime; - } - } - } - - } - - // --- 3. Touch resident tiles based on feedback --- - // When feedback-driven: only touch tiles belonging to requested sources. - // When no feedback (requestedSources == nullptr): touch everything (legacy behavior). - if ( requestedSources ) { - // Build a set of (format, srcIdx) pairs that are requested - for ( auto& [fmt, sources] : m_SourceTextures ) { - auto atlasIt = m_TiledAtlases.find( fmt ); - if ( atlasIt == m_TiledAtlases.end() ) - continue; - - const TiledAtlas& atlas = atlasIt->second; - - for ( UINT srcIdx = 0; srcIdx < static_cast( sources.size() ); ++srcIdx ) { - UINT globalIdx = m_GlobalSourceOffsets[fmt] + srcIdx; - if ( requestedSources->count( globalIdx ) == 0 ) - continue; // Not visible — don't touch, let it age out - - const auto& src = sources[srcIdx]; - - // Touch all tiles this source covers across all loaded mips - UINT numSub = atlas.mipLevels * atlas.arraySlices; - std::vector tilings( numSub ); - UINT tt = 0; - D3D11_PACKED_MIP_DESC pd = {}; - D3D11_TILE_SHAPE ts = {}; - m_Device2->GetResourceTiling( - atlas.texture.Get(), - &tt, &pd, &ts, &numSub, 0, tilings.data() ); - - UINT standardMips = atlas.mipLevels - pd.NumPackedMips; - for ( UINT mip = 0; mip < standardMips; ++mip ) { - UINT dstSub = D3D11CalcSubresource( mip, src.slice, atlas.mipLevels ); - if ( dstSub >= numSub ) continue; - - UINT regionX = src.x >> mip; - UINT regionY = src.y >> mip; - UINT regionW = std::max( 1u, src.width >> mip ); - UINT regionH = std::max( 1u, src.height >> mip ); - UINT tileW = ts.WidthInTexels; - UINT tileH = ts.HeightInTexels; - if ( tileW == 0 || tileH == 0 ) continue; - - UINT tileStartX = regionX / tileW; - UINT tileStartY = regionY / tileH; - UINT tileEndX = std::min( tilings[dstSub].WidthInTiles > 0 ? tilings[dstSub].WidthInTiles - 1 : 0, - ( regionX + regionW - 1 ) / tileW ); - UINT tileEndY = std::min( tilings[dstSub].HeightInTiles > 0 ? tilings[dstSub].HeightInTiles - 1 : 0, - ( regionY + regionH - 1 ) / tileH ); - - for ( UINT ty = tileStartY; ty <= tileEndY; ++ty ) { - for ( UINT tx = tileStartX; tx <= tileEndX; ++tx ) { - uint64_t tileKey = MakeTileKey( fmt, dstSub, tx, ty ); - auto stateIt = m_TileStates.find( tileKey ); - if ( stateIt != m_TileStates.end() && stateIt->second.state == TileState::Resident ) { - stateIt->second.lastUsedTime = currentTime; - } - } - } - } - } - } - } else { - // No feedback — touch everything (backward compatibility) - for ( auto& [key, tile] : m_TileStates ) { - if ( tile.state == TileState::Resident ) { - tile.lastUsedTime = currentTime; - } - } - } - - // --- 4. Identify unload candidates --- - // (Currently all resident tiles are touched every frame above, so nothing - // will be unloaded. This section is kept for future distance-based eviction - // where step 3 would only touch tiles near the camera.) - m_UnloadCandidates.clear(); - for ( auto& [key, tile] : m_TileStates ) { - if ( tile.state == TileState::Resident && - ( currentTime - tile.lastUsedTime ) > UNLOAD_COOLDOWN_SECONDS ) { - - // Don't unload preloaded coarse mips - auto atlasIt = m_TiledAtlases.find( tile.format ); - if ( atlasIt != m_TiledAtlases.end() ) { - UINT mipLevels = atlasIt->second.mipLevels; - UINT mip = tile.subresource % mipLevels; - - // Query tile layout once per atlas - UINT numSubresourceTilings = atlasIt->second.mipLevels * atlasIt->second.arraySlices; - D3D11_PACKED_MIP_DESC packedDesc = {}; - D3D11_TILE_SHAPE tileShape = {}; - UINT totalTiles = 0; - { - std::vector tmp( numSubresourceTilings ); - m_Device2->GetResourceTiling( - atlasIt->second.texture.Get(), - &totalTiles, &packedDesc, &tileShape, - &numSubresourceTilings, 0, tmp.data() - ); - } - - UINT standardMips = mipLevels - packedDesc.NumPackedMips; - UINT preloadStart = ( standardMips > PRELOADED_COARSE_MIPS ) - ? standardMips - PRELOADED_COARSE_MIPS : 0; - if ( mip >= preloadStart ) - continue; // Don't unload preloaded coarse mips - } - - tile.state = TileState::PendingUnload; - m_UnloadCandidates.push_back( key ); - } - } - - // --- 5. Process unload queue (frame-budgeted) --- - UINT unmapsThisFrame = 0; - for ( auto key : m_UnloadCandidates ) { - if ( unmapsThisFrame >= MAX_TILE_UNMAPS_PER_FRAME ) - break; - - auto stateIt = m_TileStates.find( key ); - if ( stateIt == m_TileStates.end() ) - continue; - - TileInfo& tile = stateIt->second; - if ( tile.state != TileState::PendingUnload ) - continue; - - auto atlasIt = m_TiledAtlases.find( tile.format ); - if ( atlasIt == m_TiledAtlases.end() ) - continue; - - // Remap to default tile - MapTileToDefault( atlasIt->second, tile.subresource, - tile.tileX, tile.tileY, tile.format ); - - // Free the pool tile - FreeTile( tile.format, tile.poolTileIndex ); - - tile.state = TileState::Unmapped; - tile.poolTileIndex = 0; - unmapsThisFrame++; - - // Remove m_LoadedSources entries that depended on this tile so the - // source will be re-uploaded if it becomes visible again. - for ( uint64_t sk : tile.sourceKeys ) { - m_LoadedSources.erase( sk ); - } - tile.sourceKeys.clear(); - } -} diff --git a/D3D11Engine/D3D11StreamingResourcesManager.h b/D3D11Engine/D3D11StreamingResourcesManager.h deleted file mode 100644 index f307011d..00000000 --- a/D3D11Engine/D3D11StreamingResourcesManager.h +++ /dev/null @@ -1,261 +0,0 @@ -#pragma once - -#include "pch.h" -#include -#include -#include -#include -#include -#include -#include "D3D11TextureAtlasManager.h" - -// Forward declarations -class zCTexture; - -/** - * Streaming resources manager using D3D11 Tiled Resources (Reserved Resources). - * - * Creates Texture2DArray atlases backed by tile pools instead of fully-committed - * GPU memory. Tiles are streamed in/out based on camera proximity and screen-space - * priority, keeping memory footprint bounded even with large texture sets. - * - * All unmapped tiles point to a single default tile (magenta debug fill), so the - * SRV is always valid and shaders never sample garbage data. - * - * Coarsest mip levels are preloaded immediately so objects always display at least - * a low-resolution texture. - */ -class D3D11StreamingResourcesManager { -public: - D3D11StreamingResourcesManager() = default; - ~D3D11StreamingResourcesManager(); - - // Public struct: source texture info (needed by graphics engine for feedback lookup) - struct SourceTextureInfo { - Microsoft::WRL::ComPtr texture; - UINT x, y, slice; - UINT width, height; - UINT sourceMipLevels; - }; - - // --- Capability query (static, can be called before Init) --- - static bool GetIsStreamingSupported( ID3D11Device1* device ); - - // --- Lifecycle --- - bool Init( ID3D11Device1* device, ID3D11DeviceContext1* context ); - void Shutdown(); - - // --- Atlas creation --- - // Creates a tiled Texture2DArray with the same bin-packing layout as - // TextureManager::CreateAtlasArray, but backed by tile pools. - // Returns a compatible AtlasResult so the rest of the pipeline is unchanged. - TextureManager::AtlasResult CreateStreamingAtlasArray( - std::basic_string_view sourceTextures, - UINT atlasSize = 2048, UINT mipLevels = 6 ); - - // --- Per-frame streaming update --- - // Called once per frame before draw calls. Evaluates tile priorities, - // streams in/out tiles within the per-frame budget. - // If requestedSources is non-null, only sources in the set are loaded (feedback-driven). - // If null, all sources are loaded (backward compatibility / non-streaming fallback). - void UpdateStreaming( const DirectX::XMFLOAT3& cameraPosition, float drawDistance, float currentTime, - const std::unordered_set* requestedSources = nullptr ); - - // --- Feedback query methods --- - // Global source offset for a given format (cumulative count of sources in prior formats). - UINT GetGlobalSourceOffset( DXGI_FORMAT fmt ) const; - // Total number of sources across all formats. - UINT GetTotalSourceCount() const; - - // Source texture list for a given format (for populating SubmeshGPUData.globalSourceIndex). - const std::vector& GetSourceTextures( DXGI_FORMAT fmt ) const; - - // --- World lifecycle --- - void OnWorldUnloaded(); - -private: - // --- Device interfaces (QueryInterface'd from ID3D11Device1/Context1) --- - Microsoft::WRL::ComPtr m_Device2; - Microsoft::WRL::ComPtr m_Context2; - - // Keep a ref to ID3D11Device for non-tiled operations (staging textures etc.) - Microsoft::WRL::ComPtr m_Device; - Microsoft::WRL::ComPtr m_Context; - - bool m_Initialized = false; - - // ========================================================================= - // Tile Pool - // ========================================================================= - // One tile pool per DXGI_FORMAT group (mirrors the atlas grouping). - struct TilePool { - Microsoft::WRL::ComPtr buffer; // D3D11_RESOURCE_MISC_TILE_POOL - UINT totalTiles = 0; // capacity in 64KB tiles - UINT usedTiles = 0; // high-water allocation mark - std::vector freeTiles; // recycled tile indices (LIFO stack) - }; - std::unordered_map m_TilePools; - - // ========================================================================= - // Tiled Atlas Textures - // ========================================================================= - struct TiledAtlas { - Microsoft::WRL::ComPtr texture; // D3D11_RESOURCE_MISC_TILED - Microsoft::WRL::ComPtr srv; - UINT atlasSize = 0; - UINT mipLevels = 0; - UINT arraySlices = 0; - DXGI_FORMAT format = DXGI_FORMAT_UNKNOWN; - }; - std::unordered_map m_TiledAtlases; - - // ========================================================================= - // Default (null) Tile - // ========================================================================= - // A single 64KB tile filled with magenta (1,0,1,1), mapped to all unmapped - // tile regions so shaders always read valid data. - struct DefaultTile { - UINT poolIndex = 0; - bool initialized = false; - }; - std::unordered_map m_DefaultTiles; - - // ========================================================================= - // Tile State Tracking - // ========================================================================= - enum class TileState : uint8_t { - Unmapped, // mapped to default magenta tile - PendingLoad, // queued for streaming in - Resident, // fully loaded with real data - PendingUnload // cooldown timer running before unmap - }; - - struct TileInfo { - UINT poolTileIndex = 0; - TileState state = TileState::Unmapped; - float lastUsedTime = 0.0f; - float priority = 0.0f; - UINT subresource = 0; - UINT tileX = 0; - UINT tileY = 0; - DXGI_FORMAT format = DXGI_FORMAT_UNKNOWN; - // Source-mip keys that depend on this tile. When evicted, these are - // erased from m_LoadedSources so re-streaming can happen. - std::vector sourceKeys; - }; - // key = MakeTileKey(format, subresource, tileX, tileY) - std::unordered_map m_TileStates; - - // ========================================================================= - // Streaming Request Queue - // ========================================================================= - // Streaming operates at the source+mip granularity, NOT individual tiles. - // Multiple non-uniform sources may share the same atlas tile; tile-level - // streaming caused tiles to be re-mapped and overwritten each frame as - // different sources "claimed" the shared tile, producing frame-flickering - // and eventual black (uninitialized pool memory). - struct StreamingRequest { - UINT sourceIndex = 0; // index into m_SourceTextures[format] - float priority = 0.0f; - UINT mipLevel = 0; - DXGI_FORMAT format = DXGI_FORMAT_UNKNOWN; - - bool operator<( const StreamingRequest& other ) const { - return priority < other.priority; // max-heap: highest priority first - } - }; - std::priority_queue m_LoadQueue; - std::vector m_UnloadCandidates; - - // Tracks which source+mip combos have been fully uploaded. - // Key = MakeSourceKey(format, sourceIndex, mip) - std::unordered_set m_LoadedSources; - - // ========================================================================= - // Source Texture References (for uploading tile data) - // ========================================================================= - // Maps (format, PackItem index) to the source texture so we can read tile - // data when streaming in. Populated during CreateStreamingAtlasArray. - // (SourceTextureInfo is declared in the public section above.) - // Key: format -> vector of source textures (matches atlas packing order) - std::unordered_map> m_SourceTextures; - - // ========================================================================= - // Global Source Offsets (for feedback texture indexing) - // ========================================================================= - // Maps each format to its cumulative offset in the flat global source array. - // Computed during CreateStreamingAtlasArray. - std::unordered_map m_GlobalSourceOffsets; - UINT m_TotalSourceCount = 0; - - // ========================================================================= - // Staging Ring Buffer - // ========================================================================= - static constexpr UINT STAGING_RING_SIZE = 8; - struct StagingBuffer { - Microsoft::WRL::ComPtr texture; - bool inUse = false; - }; - std::array m_StagingRing{}; - UINT m_StagingRingHead = 0; - - // ========================================================================= - // Budget & Tuning Constants - // ========================================================================= - static constexpr UINT TILE_SIZE_BYTES = 65536; // 64KB per tile - static constexpr UINT MAX_TILE_UPLOADS_PER_FRAME = 8; // tiles uploaded per frame in UpdateStreaming - static constexpr UINT MAX_TILE_UNMAPS_PER_FRAME = 4; - static constexpr float UNLOAD_COOLDOWN_SECONDS = 5.0f; - static constexpr UINT INITIAL_POOL_TILES = 512; // 32 MB per pool - static constexpr UINT PRELOADED_COARSE_MIPS = 1; // preload ALL standard mips at creation - - // ========================================================================= - // Internal Helpers - // ========================================================================= - - // Tile pool management - UINT AllocateTile( DXGI_FORMAT fmt ); - void FreeTile( DXGI_FORMAT fmt, UINT tileIndex ); - bool CreateTilePool( DXGI_FORMAT fmt, UINT numTiles ); - void GrowTilePool( DXGI_FORMAT fmt, UINT additionalTiles ); - - // Default tile - void InitDefaultTile( DXGI_FORMAT fmt ); - void FillDefaultTileData( DXGI_FORMAT fmt, std::vector& outData ); - - // Tile mapping - void MapTileToDefault( const TiledAtlas& atlas, UINT subresource, - UINT tileX, UINT tileY, DXGI_FORMAT fmt ); - void MapTileToPool( const TiledAtlas& atlas, UINT subresource, - UINT tileX, UINT tileY, UINT poolTileIndex, - DXGI_FORMAT fmt ); - void MapAllTilesToDefault( const TiledAtlas& atlas, DXGI_FORMAT fmt ); - - // Upload one source texture's mip region into the tiled atlas. - // All tiles covering the source region must be mapped to pool tiles before calling. - // Uses a bounded D3D11_BOX so non-uniform texture sizes are handled correctly. - void UploadTileData( const TiledAtlas& atlas, UINT subresource, - const SourceTextureInfo& src, UINT srcMip ); - - // Generate missing mip levels for a source texture that has fewer mips than - // the atlas requires. Uses DirectXTex to capture the source's last mip, - // generate a proper box-filtered mip chain, re-compress to the atlas format, - // and upload each generated level. Tiles are mapped before upload. - void GenerateMissingMips( const TiledAtlas& atlas, const SourceTextureInfo& src, - DXGI_FORMAT fmt, UINT srcIndex, - const std::vector& tilings, - UINT numSubresourceTilings, - const D3D11_TILE_SHAPE& tileShape ); - - // Key generation - static uint64_t MakeTileKey( DXGI_FORMAT fmt, UINT subresource, - UINT tileX, UINT tileY ); - static uint64_t MakeSourceKey( DXGI_FORMAT fmt, UINT sourceIndex, UINT mip ); - - // Preload coarsest mip levels (called from CreateStreamingAtlasArray) - void PreloadCoarseMips( DXGI_FORMAT fmt ); - - // Get number of tiles in a subresource dimension - void GetSubresourceTileCount( const TiledAtlas& atlas, UINT mipLevel, - UINT& tilesX, UINT& tilesY ) const; -}; From 5073b349055f2857bfed67920e83ae8d054b966d Mon Sep 17 00:00:00 2001 From: kirides <13602143+kirides@users.noreply.github.com> Date: Sat, 21 Mar 2026 20:57:38 +0100 Subject: [PATCH 31/42] atlas: use `Engine::GAPI->GetStaticMeshVisuals()` to ensure we collect all possible textures --- D3D11Engine/D3D11VobAtlasPass.cpp | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/D3D11Engine/D3D11VobAtlasPass.cpp b/D3D11Engine/D3D11VobAtlasPass.cpp index a5e6fbea..f13f544d 100644 --- a/D3D11Engine/D3D11VobAtlasPass.cpp +++ b/D3D11Engine/D3D11VobAtlasPass.cpp @@ -75,14 +75,14 @@ void D3D11VobAtlasPass::BuildTextureAtlasses() { std::unordered_set seenTextures; std::vector uniqueTextures; - for ( auto vobInfo : m_Engine->m_StaticVobs ) { - for ( auto& byTex : reinterpret_cast(vobInfo->VisualInfo)->MeshesByTexture ) { - zCTexture* tex = byTex.first.Material->GetTexture(); + for ( auto [_, vobInfo] : Engine::GAPI->GetStaticMeshVisuals() ) { + for ( auto& byTex : vobInfo->MeshesByTexture ) { + zCTexture* tex = byTex.first.Texture; if ( !tex ) { - auto vis = reinterpret_cast(vobInfo->VisualInfo)->Visual; + auto vis = vobInfo->Visual; LogError() - << "Texture not found for visual " << vobInfo->VisualInfo->VisualName + << "Texture not found for visual " << vobInfo->VisualName << " Visual Type: " << vis->GetVisualType(); continue; @@ -112,10 +112,11 @@ void D3D11VobAtlasPass::BuildTextureAtlasses() { D3D11_TEXTURE2D_DESC desc; engineTex->GetTextureObject()->GetDesc( &desc ); - if ( desc.Format < 1 || desc.Format >= TEXTURE_ATLAS_MAX ) { + if ( desc.Format < 0 || desc.Format >= TEXTURE_ATLAS_MAX ) { LogError() << "Texture " << tex->GetName() << " has unsupported format for atlas: " << desc.Format; continue; } + LogInfo() << "Texture for atlas: " << tex->GetName() << " Format: " << desc.Format; uniqueTextures.push_back( { tex, desc.Format, engineTex->GetTextureObject() } ); } } @@ -186,8 +187,10 @@ void D3D11VobAtlasPass::BuildGeometryBuffers() { for ( auto const& [proto, visual] : Engine::GAPI->GetStaticMeshVisuals() ) { for ( auto const& [meshKey, meshList] : visual->MeshesByTexture ) { auto it = m_TextureAtlasLookup.find( meshKey.Texture ); - if ( it == m_TextureAtlasLookup.end() ) + if ( it == m_TextureAtlasLookup.end() ) { + LogWarn() << "Texture for mesh not found: " << (meshKey.Texture ? meshKey.Texture->GetName() : "unknown"); continue; + } const TextureAtlasLookup& lookup = it->second; auto& group = groupsByFormat[lookup.atlasFormat]; From 6aef15a5e67d1c0fec594b93c058a5a522d6db10 Mon Sep 17 00:00:00 2001 From: kirides <13602143+kirides@users.noreply.github.com> Date: Sat, 21 Mar 2026 20:57:54 +0100 Subject: [PATCH 32/42] static vobs: also cache all Indoor-vobs --- D3D11Engine/D3D11GraphicsEngine.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/D3D11Engine/D3D11GraphicsEngine.cpp b/D3D11Engine/D3D11GraphicsEngine.cpp index 576d8852..23265aed 100644 --- a/D3D11Engine/D3D11GraphicsEngine.cpp +++ b/D3D11Engine/D3D11GraphicsEngine.cpp @@ -8086,9 +8086,9 @@ void D3D11GraphicsEngine::CacheWorldStaticVobs() { ctx.frustum = Frustum::AlwaysContainingFrustum(); ctx.drawDistances.OutdoorVobs = 1'000'000; ctx.drawDistances.OutdoorVobsSmall = ctx.drawDistances.OutdoorVobs; - ctx.drawDistances.IndoorVobs = 0; - ctx.drawDistances.VisualFX = 0; - Engine::GAPI->CollectVisibleVobs( ctx, (EBspTreeCollectFlags)(EBspTreeCollectFlags::COLLECT_VOBS | EBspTreeCollectFlags::COLLECT_DISABLE_CHECK_DIST) ); + ctx.drawDistances.IndoorVobs = ctx.drawDistances.OutdoorVobs; + ctx.drawDistances.VisualFX = 0; + Engine::GAPI->CollectVisibleVobs( ctx, (EBspTreeCollectFlags)(COLLECT_VOBS | COLLECT_INDOOR_VOBS | COLLECT_DISABLE_CHECK_DIST) ); const size_t totalItems = m_StaticVobs.size(); // Correct math to calculate exact number of batches (rounds up to nearest multiple of 8, AVX ;) ) From 39de8350956bbaee53a5c87222922a75fb618f56 Mon Sep 17 00:00:00 2001 From: kirides <13602143+kirides@users.noreply.github.com> Date: Wed, 25 Mar 2026 17:58:50 +0100 Subject: [PATCH 33/42] rebase fix --- D3D11Engine/GothicAPI.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/D3D11Engine/GothicAPI.cpp b/D3D11Engine/GothicAPI.cpp index 7fc3c70b..8cd4e266 100644 --- a/D3D11Engine/GothicAPI.cpp +++ b/D3D11Engine/GothicAPI.cpp @@ -4455,7 +4455,7 @@ void GothicAPI::ResetVobFrameStats( ) { for ( auto&& it : VobMap ) { it.second->VisibleInRenderPass = false; } - for ( auto&& it : VobLightMap ) { + for ( auto&& it : VobLights_Sorted ) { it.second->VisibleInRenderPass = false; it.second->VisibleInFrame = false; } From 164c4dc8459c1a1ebcc592f78dfe37ab6f3365eb Mon Sep 17 00:00:00 2001 From: kirides <13602143+kirides@users.noreply.github.com> Date: Wed, 25 Mar 2026 20:28:09 +0100 Subject: [PATCH 34/42] re-enable static vob atlas --- D3D11Engine/D3D11GraphicsEngine.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/D3D11Engine/D3D11GraphicsEngine.cpp b/D3D11Engine/D3D11GraphicsEngine.cpp index 23265aed..e8c84947 100644 --- a/D3D11Engine/D3D11GraphicsEngine.cpp +++ b/D3D11Engine/D3D11GraphicsEngine.cpp @@ -605,17 +605,15 @@ XRESULT D3D11GraphicsEngine::Init() { if (maxFeatureLevel >= D3D_FEATURE_LEVEL::D3D_FEATURE_LEVEL_11_0) { // check amount of GPU Memory available constexpr uint64_t GiB = 1024ull * 1024ull * 1024ull; - if ( adpDesc.DedicatedVideoMemory >= 3 * GiB ) { // on 32 bit processes dx11 can't see more than 3GiB + if ( adpDesc.DedicatedVideoMemory >= 3 * GiB ) { // currently we just assume everything fits into memory. // in the future we should make use of Tiled Resources, which would allow us // to support more memory intensive features, even on less than 4GB cards, by streaming in the necessary tiles. + // but that's very expensive on the CPU and requires deferred loading of textures, and a lot of management. SupportTextureAtlases = true; Engine::GAPI->GetRendererState().RendererSettings.UseIndirectVobShadows = SupportTextureAtlases; Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.FeatureSet.EnableAtlasWorldMesh = SupportTextureAtlases; - - // VOB atlas is currently bugged, due to some vobs not getting their correct textures, - // likely due to being "animated" and at world load no animation has happened yet. - Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.FeatureSet.EnableAtlasStaticVobs = false; + Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.FeatureSet.EnableAtlasStaticVobs = SupportTextureAtlases; } } From d624b83d96c1e507ef184a339928837260cbc129 Mon Sep 17 00:00:00 2001 From: kirides <13602143+kirides@users.noreply.github.com> Date: Tue, 7 Apr 2026 12:00:44 +0200 Subject: [PATCH 35/42] rebase fix --- D3D11Engine/D3D11GraphicsEngine.cpp | 7 ++----- D3D11Engine/D3D11ShaderManager.cpp | 24 +++++------------------- D3D11Engine/GothicAPI.cpp | 7 +++++-- 3 files changed, 12 insertions(+), 26 deletions(-) diff --git a/D3D11Engine/D3D11GraphicsEngine.cpp b/D3D11Engine/D3D11GraphicsEngine.cpp index e8c84947..3883807a 100644 --- a/D3D11Engine/D3D11GraphicsEngine.cpp +++ b/D3D11Engine/D3D11GraphicsEngine.cpp @@ -5580,9 +5580,6 @@ void XM_CALLCONV D3D11GraphicsEngine::DrawWorldAroundForWorldShadow( FXMVECTOR p if ( blendAdd || blendBlend ) { continue; } - } - - for ( unsigned int i = 0; i < mlist.size(); i++ ) { for ( unsigned int i = 0; i < mlist.size(); i++ ) { // Bind texture @@ -5604,7 +5601,7 @@ void XM_CALLCONV D3D11GraphicsEngine::DrawWorldAroundForWorldShadow( FXMVECTOR p MeshInfo* mi = mlist[i]; // Draw batch - DrawInstanced( mi->MeshVertexBuffer, mi->MeshIndexBuffer, + DrawInstanced( mi->MeshVertexBuffer.get(), mi->MeshIndexBuffer.get(), mi->Indices.size(), DynamicInstancingBuffer.get(), sizeof( VobInstanceInfo ), b.Instances.size(), sizeof( ExVertexStruct ), b.StartInstanceNum ); @@ -5744,7 +5741,7 @@ void XM_CALLCONV D3D11GraphicsEngine::DrawWorldAroundForWorldShadow( FXMVECTOR p MeshInfo* mi = mlist[i]; - DrawInstanced( mi->MeshVertexBuffer, mi->MeshIndexBuffer, + DrawInstanced( mi->MeshVertexBuffer.get(), mi->MeshIndexBuffer.get(), mi->Indices.size(), DynamicInstancingBuffer.get(), sizeof( VobInstanceInfo ), batch.Instances.size(), sizeof( ExVertexStruct ), batch.StartInstanceNum ); diff --git a/D3D11Engine/D3D11ShaderManager.cpp b/D3D11Engine/D3D11ShaderManager.cpp index 7eff6017..f96df191 100644 --- a/D3D11Engine/D3D11ShaderManager.cpp +++ b/D3D11Engine/D3D11ShaderManager.cpp @@ -246,16 +246,14 @@ XRESULT D3D11ShaderManager::Init() { .with_layout( 10 ) ); Shaders.push_back( ShaderInfo::make("VS_ExInstancedObjIndirectAtlas.hlsl" ) - .with_layout( 12 ) - .with_cbuffer( sizeof( VS_ExConstantBuffer_PerFrame ) ) - .with_cbuffer( sizeof( VS_ExConstantBuffer_Wind ) ) ); + .with_layout( 12 ) ); Shaders.push_back( ShaderInfo::make( "VS_ExInstanced.hlsl" ) .with_layout( 4 ) ); // World mesh atlas vertex shader (uses same layout 12: ExVertexStruct + uint instance remap) Shaders.push_back( ShaderInfo::make( "VS_ExWorldAtlas.hlsl" ) - .with_layout( 12 ); + .with_layout( 12 ) ); Shaders.push_back( ShaderInfo::make( "VS_GrassInstanced.hlsl" ) .with_layout( 9 ) ); @@ -370,11 +368,7 @@ XRESULT D3D11ShaderManager::Init() { Shaders.push_back( ShaderInfo::make( "PS_WaterfallFoam.hlsl" ) ); //foam on at the base of waterfalls Shaders.push_back( ShaderInfo::make( "PS_DiffuseAtlas.hlsl") - .with_macros( makros ) - .with_cbuffer( sizeof( GothicGraphicsState ) ) - .with_cbuffer( sizeof( AtmosphereConstantBuffer ) ) - .with_cbuffer( sizeof( MaterialInfo::Buffer ) ) - .with_cbuffer( sizeof( float4 ) ) ); // DIST_Distance + .with_macros( makros ) ); // DIST_Distance makros.clear(); m.Name = "NORMALMAPPING"; @@ -385,19 +379,11 @@ XRESULT D3D11ShaderManager::Init() { makros.push_back( m ); Shaders.push_back( ShaderInfo::make( "PS_DiffuseAtlas.hlsl" ) - .with_macros( makros ) - .with_cbuffer( sizeof( GothicGraphicsState ) ) - .with_cbuffer( sizeof( AtmosphereConstantBuffer ) ) - .with_cbuffer( sizeof( MaterialInfo::Buffer ) ) - .with_cbuffer( sizeof( float4 ) ) ); // DIST_Distance + .with_macros( makros ) ); // DIST_Distance // World mesh atlas PS — flags-driven normal/FX/alpha-test in a single shader makros.clear(); - Shaders.push_back( ShaderInfo::make( "PS_WorldAtlas.hlsl" ) - .with_cbuffer( sizeof( GothicGraphicsState ) ) - .with_cbuffer( sizeof( AtmosphereConstantBuffer ) ) - .with_cbuffer( sizeof( MaterialInfo::Buffer ) ) - .with_cbuffer( sizeof( float4 ) ) ); // DIST_Distance + Shaders.push_back( ShaderInfo::make( "PS_WorldAtlas.hlsl" ) ); // DIST_Distance makros.clear(); diff --git a/D3D11Engine/GothicAPI.cpp b/D3D11Engine/GothicAPI.cpp index 8cd4e266..06dad5dc 100644 --- a/D3D11Engine/GothicAPI.cpp +++ b/D3D11Engine/GothicAPI.cpp @@ -5942,8 +5942,11 @@ static void CollectVisibleVobsHelperNonRecursive( BspInfo* base, vi->UpdateShadows = !PFXVobLight; vit->second = vi; - if ( !vi->IsPFXVobLight && RendererState.RendererSettings.EnablePointlightShadows >= GothicRendererSettings::PLS_STATIC_ONLY ) - Engine::GraphicsEngine->CreateShadowedPointLight( &vi->LightShadowBuffers, vi, true ); + if ( !vi->IsPFXVobLight && RendererState.RendererSettings.EnablePointlightShadows >= GothicRendererSettings::PLS_STATIC_ONLY ) { + BaseShadowedPointLight* pl; + Engine::GraphicsEngine->CreateShadowedPointLight( &pl, vi, true ); + vi->LightShadowBuffers.reset( pl ); + } } VobLightInfo* vi = vit->second; From 5c55707e79e93b7c8e0e7c3e76fa1ce8b4e916cf Mon Sep 17 00:00:00 2001 From: kirides <13602143+kirides@users.noreply.github.com> Date: Tue, 7 Apr 2026 12:17:34 +0200 Subject: [PATCH 36/42] adjust ConstantBuffer usages --- D3D11Engine/D3D11GraphicsEngine.cpp | 23 ++++++++--------- D3D11Engine/D3D11MeshAtlasPass.cpp | 10 +++----- D3D11Engine/D3D11VobAtlasPass.cpp | 39 ++++++++++++++++------------- 3 files changed, 36 insertions(+), 36 deletions(-) diff --git a/D3D11Engine/D3D11GraphicsEngine.cpp b/D3D11Engine/D3D11GraphicsEngine.cpp index 3883807a..2a4e67ac 100644 --- a/D3D11Engine/D3D11GraphicsEngine.cpp +++ b/D3D11Engine/D3D11GraphicsEngine.cpp @@ -5541,8 +5541,9 @@ void XM_CALLCONV D3D11GraphicsEngine::DrawWorldAroundForWorldShadow( FXMVECTOR p Context->PSSetShader( nullptr, nullptr, 0 ); } + GraphicsShaderConstantBuffer windParamsCB = {}; if ( ActiveVS ) { - ActiveVS->GetConstantBuffer()[1]->BindToVertexShader( 1 ); + windParamsCB = ActiveVS->GetBuffer( "WindParams" ).Bind(); } XMFLOAT3 vPlayerPosition = Engine::GAPI->GetPlayerVob() ? Engine::GAPI->GetPlayerVob()->GetPositionWorld() : XMFLOAT3( 0, 0, 0 ); @@ -5557,7 +5558,7 @@ void XM_CALLCONV D3D11GraphicsEngine::DrawWorldAroundForWorldShadow( FXMVECTOR p g_windBuffer.maxHeight = staticMeshVisual->BBox.Max.y; if ( ActiveVS ) { - ActiveVS->GetConstantBuffer()[1]->UpdateBuffer( &g_windBuffer ); + windParamsCB.Update( &g_windBuffer ); } zCTexture* previousTx = nullptr; @@ -5680,20 +5681,17 @@ void XM_CALLCONV D3D11GraphicsEngine::DrawWorldAroundForWorldShadow( FXMVECTOR p } // Rebind PS constant buffers (GPU indirect path may have overwritten them) - ActivePS->GetConstantBuffer()[0]->UpdateBuffer( - &Engine::GAPI->GetRendererState().GraphicsState ); - ActivePS->GetConstantBuffer()[0]->BindToPixelShader( 0 ); + ActivePS->GetBuffer(0).Update(&Engine::GAPI->GetRendererState().GraphicsState ).Bind(); GSky* dynSky = Engine::GAPI->GetSky(); - ActivePS->GetConstantBuffer()[1]->UpdateBuffer( &dynSky->GetAtmosphereCB() ); - ActivePS->GetConstantBuffer()[1]->BindToPixelShader( 1 ); - + ActivePS->GetBuffer(1).Update(&dynSky->GetAtmosphereCB() ).Bind(); InfiniteRangeConstantBuffer->BindToPixelShader( 3 ); SetupVS_ExConstantBuffer(); + GraphicsShaderConstantBuffer windParamsCB = {}; if ( ActiveVS ) { - ActiveVS->GetConstantBuffer()[1]->BindToVertexShader( 1 ); + windParamsCB = ActiveVS->GetBuffer(1).Bind(); } XMFLOAT3 vPlayerPosition = Engine::GAPI->GetPlayerVob() ? Engine::GAPI->GetPlayerVob()->GetPositionWorld() : XMFLOAT3( 0, 0, 0 ); @@ -5706,7 +5704,7 @@ void XM_CALLCONV D3D11GraphicsEngine::DrawWorldAroundForWorldShadow( FXMVECTOR p g_windBuffer.maxHeight = batch.VisualInfo->BBox.Max.y; if ( ActiveVS ) { - ActiveVS->GetConstantBuffer()[1]->UpdateBuffer( &g_windBuffer ); + windParamsCB.Update( &g_windBuffer ); } zCTexture* previousTx = nullptr; @@ -8005,6 +8003,8 @@ void D3D11GraphicsEngine::BuildHiZPyramid() { hiZCS->Apply(); + auto hiZCb = hiZCS->GetBuffer(0).Bind(); + for ( UINT mip = 0; mip < m_HiZMipCount; mip++ ) { UINT mipWidth = (std::max)( width >> mip, 1u ); UINT mipHeight = (std::max)( height >> mip, 1u ); @@ -8015,8 +8015,7 @@ void D3D11GraphicsEngine::BuildHiZPyramid() { cb.outputHeight = mipHeight; cb.inputMipLevel = ( mip > 0 ) ? ( mip - 1 ) : 0; cb.isCopyPass = ( mip == 0 ) ? 1 : 0; - hiZCS->GetConstantBuffer()[0]->UpdateBuffer( &cb ); - hiZCS->GetConstantBuffer()[0]->BindToComputeShader( 0 ); + hiZCb.Update( &cb ); // Bind input SRV: // Mip 0: read from depth buffer copy (avoids DSV/SRV hazard) diff --git a/D3D11Engine/D3D11MeshAtlasPass.cpp b/D3D11Engine/D3D11MeshAtlasPass.cpp index cbe88563..460234b9 100644 --- a/D3D11Engine/D3D11MeshAtlasPass.cpp +++ b/D3D11Engine/D3D11MeshAtlasPass.cpp @@ -443,17 +443,13 @@ XRESULT D3D11MeshAtlasPass::Draw() { // Pixel shader m_Engine->SetActivePixelShader( PShaderID::PS_WorldAtlas ); - m_Engine->ActivePS->GetConstantBuffer()[0]->UpdateBuffer( - &Engine::GAPI->GetRendererState().GraphicsState ); - m_Engine->ActivePS->GetConstantBuffer()[0]->BindToPixelShader( 0 ); + m_Engine->ActivePS->GetBuffer(0).Update(&Engine::GAPI->GetRendererState().GraphicsState ).Bind(); GSky* sky = Engine::GAPI->GetSky(); - m_Engine->ActivePS->GetConstantBuffer()[1]->UpdateBuffer( &sky->GetAtmosphereCB() ); - m_Engine->ActivePS->GetConstantBuffer()[1]->BindToPixelShader( 1 ); + m_Engine->ActivePS->GetBuffer(1).Update( &sky->GetAtmosphereCB() ).Bind(); MaterialInfo defMaterial{}; - m_Engine->ActivePS->GetConstantBuffer()[2]->UpdateBuffer( &defMaterial.buffer ); - m_Engine->ActivePS->GetConstantBuffer()[2]->BindToPixelShader( 2 ); + m_Engine->ActivePS->GetBuffer(2).Update( &defMaterial.buffer ).Bind(); m_Engine->InfiniteRangeConstantBuffer->BindToPixelShader( 3 ); diff --git a/D3D11Engine/D3D11VobAtlasPass.cpp b/D3D11Engine/D3D11VobAtlasPass.cpp index f13f544d..d7e2b0b0 100644 --- a/D3D11Engine/D3D11VobAtlasPass.cpp +++ b/D3D11Engine/D3D11VobAtlasPass.cpp @@ -561,8 +561,7 @@ XRESULT D3D11VobAtlasPass::Draw( const Frustum& frustum, bool bindPS ) { VS_ExConstantBuffer_Wind windBuff{}; m_Engine->ApplyWindProps( windBuff ); - m_Engine->ActiveVS->GetConstantBuffer()[1]->UpdateBuffer( &windBuff ); - m_Engine->ActiveVS->GetConstantBuffer()[1]->BindToVertexShader( 1 ); + m_Engine->ActiveVS->GetBuffer(1).Update( &windBuff ).Bind(); if ( bindPS ) context->PSSetShaderResources( 4, 1, m_Engine->ReflectionCube.GetAddressOf() ); @@ -573,6 +572,10 @@ XRESULT D3D11VobAtlasPass::Draw( const Frustum& frustum, bool bindPS ) { MaterialInfo defMaterial{}; GSky* sky = Engine::GAPI->GetSky(); + m_Engine->SetActivePixelShader( PShaderID::PS_DiffuseAtlas ); + auto lastPs = PShaderID::COUNT; + GraphicsShaderConstantBuffer buffersToBind[3] = {}; + for ( auto& group : m_AtlasDrawGroups ) { ID3D11ShaderResourceView* srv = m_TextureAtlasses[group.format].atlasSRV; if ( !srv ) @@ -583,26 +586,28 @@ XRESULT D3D11VobAtlasPass::Draw( const Frustum& frustum, bool bindPS ) { if ( needsPS ) { context->PSSetShaderResources( 0, 1, &srv ); - if ( bindPS && group.format != DXGI_FORMAT_BC2_UNORM ) - m_Engine->SetActivePixelShader( PShaderID::PS_DiffuseAtlas ); - else - m_Engine->SetActivePixelShader( PShaderID::PS_DiffuseAtlasAlphaTest ); - - m_Engine->ActivePS->GetConstantBuffer()[0]->UpdateBuffer( - &Engine::GAPI->GetRendererState().GraphicsState ); - m_Engine->ActivePS->GetConstantBuffer()[0]->BindToPixelShader( 0 ); + auto newPs = (bindPS && group.format != DXGI_FORMAT_BC2_UNORM) + ? PShaderID::PS_DiffuseAtlas + : PShaderID::PS_DiffuseAtlasAlphaTest; - m_Engine->ActivePS->GetConstantBuffer()[1]->UpdateBuffer( &sky->GetAtmosphereCB() ); - m_Engine->ActivePS->GetConstantBuffer()[1]->BindToPixelShader( 1 ); + if ( newPs != lastPs ) { + m_Engine->SetActivePixelShader( newPs ); + m_Engine->ActivePS->Apply(); - m_Engine->ActivePS->GetConstantBuffer()[2]->UpdateBuffer( &defMaterial.buffer ); - m_Engine->ActivePS->GetConstantBuffer()[2]->BindToPixelShader( 2 ); + buffersToBind[0] = m_Engine->ActivePS->GetBuffer( 0 ).Bind(); + buffersToBind[1] = m_Engine->ActivePS->GetBuffer( 1 ).Bind(); + buffersToBind[2] = m_Engine->ActivePS->GetBuffer( 2 ).Bind(); + m_Engine->OutdoorVobsConstantBuffer->BindToPixelShader( 3 ); - m_Engine->OutdoorVobsConstantBuffer->BindToPixelShader( 3 ); + lastPs = newPs; + } - m_Engine->ActivePS->Apply(); - } else { + buffersToBind[0].Update( &Engine::GAPI->GetRendererState().GraphicsState ); + buffersToBind[1].Update( &sky->GetAtmosphereCB() ); + buffersToBind[2].Update( &defMaterial.buffer ); + } else if ( lastPs != PShaderID::COUNT ) { context->PSSetShader( nullptr, nullptr, 0 ); + lastPs = PShaderID::COUNT; } DrawMultiIndexedInstancedIndirect( From 5ff3331f809922c2aa3321451e213101a57b7474 Mon Sep 17 00:00:00 2001 From: kirides <13602143+kirides@users.noreply.github.com> Date: Tue, 7 Apr 2026 12:44:33 +0200 Subject: [PATCH 37/42] fix wind when drawing vobs using atlas approach --- D3D11Engine/ConstantBufferStructs.h | 6 +++++- D3D11Engine/D3D11VobAtlasPass.cpp | 3 +++ D3D11Engine/Shaders/CS_CullVobs.hlsl | 8 +++++++- .../VS_ExInstancedObjIndirectAtlas.hlsl | 19 +++++++++++-------- 4 files changed, 26 insertions(+), 10 deletions(-) diff --git a/D3D11Engine/ConstantBufferStructs.h b/D3D11Engine/ConstantBufferStructs.h index 0bf452fa..e5c5c282 100644 --- a/D3D11Engine/ConstantBufferStructs.h +++ b/D3D11Engine/ConstantBufferStructs.h @@ -29,6 +29,8 @@ struct VobInstanceInfoAtlas { float uEnd; float vEnd; UINT globalSourceIndex; // global source index into feedback texture + float minHeight; // BBox.Min.y for per-vob wind calculations + float maxHeight; // BBox.Max.y for per-vob wind calculations }; // Descriptor returned for use with shader @@ -61,7 +63,9 @@ struct VobGPUData { float canBeAffectedByPlayer; UINT submeshStart; // index into SubmeshGPUData[] UINT submeshCount; // how many submeshes this vob maps to - UINT pad2[3]; + float minHeight; // BBox.Min.y for per-vob wind calculations + float maxHeight; // BBox.Max.y for per-vob wind calculations + UINT pad2; }; // Per-submesh lookup, shared across all vobs with the same visual diff --git a/D3D11Engine/D3D11VobAtlasPass.cpp b/D3D11Engine/D3D11VobAtlasPass.cpp index d7e2b0b0..99098552 100644 --- a/D3D11Engine/D3D11VobAtlasPass.cpp +++ b/D3D11Engine/D3D11VobAtlasPass.cpp @@ -385,6 +385,9 @@ void D3D11VobAtlasPass::BuildGPUCullingBuffers() { data.canBeAffectedByPlayer = 0.0f; } + data.minHeight = visual->BBox.Min.y; + data.maxHeight = visual->BBox.Max.y; + auto it = visualSubmeshRanges.find( visual ); if ( it != visualSubmeshRanges.end() ) { data.submeshStart = it->second.start; diff --git a/D3D11Engine/Shaders/CS_CullVobs.hlsl b/D3D11Engine/Shaders/CS_CullVobs.hlsl index f335313d..7b78232b 100644 --- a/D3D11Engine/Shaders/CS_CullVobs.hlsl +++ b/D3D11Engine/Shaders/CS_CullVobs.hlsl @@ -34,7 +34,9 @@ struct VobGPUData float canBeAffectedByPlayer; uint submeshStart; uint submeshCount; - uint pad2[3]; + float minHeight; + float maxHeight; + uint pad2; }; struct SubmeshGPUData @@ -62,6 +64,8 @@ struct VobInstanceInfoAtlas float uEnd; float vEnd; uint globalSourceIndex; + float minHeight; + float maxHeight; }; StructuredBuffer VobBuffer : register( t0 ); @@ -237,6 +241,8 @@ void CSMain( uint3 DTid : SV_DispatchThreadID ) inst.uEnd = sm.uEnd; inst.vEnd = sm.vEnd; inst.globalSourceIndex = sm.globalSourceIndex; + inst.minHeight = vob.minHeight; + inst.maxHeight = vob.maxHeight; InstanceOutput[sm.instanceBaseOffset + slot] = inst; diff --git a/D3D11Engine/Shaders/VS_ExInstancedObjIndirectAtlas.hlsl b/D3D11Engine/Shaders/VS_ExInstancedObjIndirectAtlas.hlsl index bd4349ef..1623642a 100644 --- a/D3D11Engine/Shaders/VS_ExInstancedObjIndirectAtlas.hlsl +++ b/D3D11Engine/Shaders/VS_ExInstancedObjIndirectAtlas.hlsl @@ -33,6 +33,8 @@ struct VobInstanceInfoAtlas { float uEnd; float vEnd; uint globalSourceIndex; + float minHeight; + float maxHeight; }; StructuredBuffer instances : register(t1); @@ -85,17 +87,17 @@ static const float phaseVariation = 0.40f; static const float windStrengMult = 16.0f; static const float PI_2 = 6.283185; -float GetInstancePhaseOffset(float4x4 objMatrix) +float GetInstancePhaseOffset(float4x4 objMatrix, float maxH) { - float seed = dot(objMatrix._11_22_33, float3(12.9898, 78.233, 53.539)) + maxHeight; + float seed = dot(objMatrix._11_22_33, float3(12.9898, 78.233, 53.539)) + maxH; return frac(sin(seed) * 43758.5453) * phaseVariation; } -float3 ApplyTreeWind(float3 vertexPos, float3 direction, float heightNorm, float timeSec, float4x4 instMatrix, float windStrength) +float3 ApplyTreeWind(float3 vertexPos, float3 direction, float heightNorm, float timeSec, float4x4 instMatrix, float windStrength, float maxH) { float shouldAffect = saturate(sign(heightNorm - trunkStiffness + 0.0001f)); - float instancePhase = GetInstancePhaseOffset(instMatrix) * PI_2; + float instancePhase = GetInstancePhaseOffset(instMatrix, maxH) * PI_2; float adjustedHeight = saturate((heightNorm - trunkStiffness) / (1.0 - trunkStiffness)) * shouldAffect; float heightFactor = pow(adjustedHeight, 2.6f); @@ -167,7 +169,7 @@ VS_OUTPUT VSMain( VS_INPUT Input ) if (inst.canBeAffectedByPlayer > 0) { - position += CalculatePlayerInfluence(playerPos, position, minHeight, maxHeight, inst.world); + position += CalculatePlayerInfluence(playerPos, position, inst.minHeight, inst.maxHeight, inst.world); } #endif @@ -175,8 +177,8 @@ VS_OUTPUT VSMain( VS_INPUT Input ) if (inst.windStrength > 0) { - float heightRange = max(maxHeight - minHeight, 0.001); - float vertexHeightNorm = saturate((Input.vPosition.y - minHeight) / heightRange); + float heightRange = max(inst.maxHeight - inst.minHeight, 0.001); + float vertexHeightNorm = saturate((Input.vPosition.y - inst.minHeight) / heightRange); position += ApplyTreeWind( Input.vPosition, @@ -184,7 +186,8 @@ VS_OUTPUT VSMain( VS_INPUT Input ) vertexHeightNorm, globalTime, inst.world, - inst.windStrength + inst.windStrength, + inst.maxHeight ); } #endif From d96d48882b7eefd2492b7f48e3705573b7334f6a Mon Sep 17 00:00:00 2001 From: kirides <13602143+kirides@users.noreply.github.com> Date: Tue, 7 Apr 2026 12:44:43 +0200 Subject: [PATCH 38/42] Fix Atlas shadow shader --- D3D11Engine/D3D11GraphicsEngine.cpp | 9 +++++++- D3D11Engine/D3D11ShaderManager.cpp | 11 ++++++++++ D3D11Engine/ShaderIDs.h | 1 + D3D11Engine/Shaders/PS_DiffuseAtlas.hlsl | 28 ++++++++++++++++++++++++ 4 files changed, 48 insertions(+), 1 deletion(-) diff --git a/D3D11Engine/D3D11GraphicsEngine.cpp b/D3D11Engine/D3D11GraphicsEngine.cpp index 2a4e67ac..b4409119 100644 --- a/D3D11Engine/D3D11GraphicsEngine.cpp +++ b/D3D11Engine/D3D11GraphicsEngine.cpp @@ -5346,7 +5346,7 @@ void XM_CALLCONV D3D11GraphicsEngine::DrawWorldAroundForWorldShadow( FXMVECTOR p Engine::GAPI->SetViewTransformXM( view ); // Set shader - SetActivePixelShader( PShaderID::PS_DiffuseAlphaTestShadows ); + SetActivePixelShader( PShaderID::PS_DiffuseAlphaTest ); auto defaultPS = ActivePS; SetActiveVertexShader( VShaderID::VS_Ex ); @@ -5421,10 +5421,17 @@ void XM_CALLCONV D3D11GraphicsEngine::DrawWorldAroundForWorldShadow( FXMVECTOR p bool drawStaticVobs = true; if ( Engine::GAPI->GetRendererState().RendererSettings.UseIndirectVobShadows && m_VobAtlasPass->IsReady() ) { + + SetActivePixelShader( PShaderID::PS_DiffuseAtlasAlphaTestShadows ); + defaultPS = ActivePS; + // GPU indirect path: reuse the VOB atlas pass with the cascade/shadow frustum. // BC1 groups render depth-only (no PS); BC2 groups use the alpha-test PS. m_VobAtlasPass->Draw( currentFrustum, /*bindPS=*/false ); drawStaticVobs = false; + + SetActivePixelShader( PShaderID::PS_DiffuseAlphaTestShadows ); + defaultPS = ActivePS; } static std::vector dynamicVobCasters; diff --git a/D3D11Engine/D3D11ShaderManager.cpp b/D3D11Engine/D3D11ShaderManager.cpp index f96df191..3876e704 100644 --- a/D3D11Engine/D3D11ShaderManager.cpp +++ b/D3D11Engine/D3D11ShaderManager.cpp @@ -380,6 +380,17 @@ XRESULT D3D11ShaderManager::Init() { Shaders.push_back( ShaderInfo::make( "PS_DiffuseAtlas.hlsl" ) .with_macros( makros ) ); // DIST_Distance + + makros.clear(); + m.Name = "NORMALMAPPING"; + m.Definition = "0"; + makros.push_back( m ); + m.Name = "ALPHATEST_SHADOWS"; + m.Definition = "1"; + makros.push_back( m ); + + Shaders.push_back( ShaderInfo::make( "PS_DiffuseAtlas.hlsl" ) + .with_macros( makros ) ); // DIST_Distance // World mesh atlas PS — flags-driven normal/FX/alpha-test in a single shader makros.clear(); diff --git a/D3D11Engine/ShaderIDs.h b/D3D11Engine/ShaderIDs.h index dd3fe00e..082f04aa 100644 --- a/D3D11Engine/ShaderIDs.h +++ b/D3D11Engine/ShaderIDs.h @@ -82,6 +82,7 @@ enum class PShaderID : size_t { PS_DiffuseNormalmappedAlphaTestFxMap, PS_DiffuseAtlas, PS_DiffuseAtlasAlphaTest, + PS_DiffuseAtlasAlphaTestShadows, PS_Preview_White, PS_Preview_Textured, PS_Preview_TexturedLit, diff --git a/D3D11Engine/Shaders/PS_DiffuseAtlas.hlsl b/D3D11Engine/Shaders/PS_DiffuseAtlas.hlsl index 8ad84315..98abd01d 100644 --- a/D3D11Engine/Shaders/PS_DiffuseAtlas.hlsl +++ b/D3D11Engine/Shaders/PS_DiffuseAtlas.hlsl @@ -68,7 +68,35 @@ float2 CalculateVelocity(float4 currClipPos, float4 prevClipPos) //-------------------------------------------------------------------------------------- // Pixel Shader //-------------------------------------------------------------------------------------- +#if ALPHATEST_SHADOWS == 1 +void PSMain( PS_INPUT Input ) +{ + // Per-pixel atlas UV remapping: avoids frac() interpolation collapse in the VS + // (frac(1.0)=0.0 in VS causes entire [0,1] UV range to collapse to a single texel). + // SampleGrad uses gradients from the raw (pre-frac) UVs so MIP selection stays correct + // even at UV wrap boundaries where frac() would create huge derivative discontinuities. + float2 rawUV = Input.vTexcoord3D.xy; + float slice = Input.vTexcoord3D.z; + float2 atlasScale = Input.vAtlasRect.zw - Input.vAtlasRect.xy; // (uEnd-uStart, vEnd-vStart) + + // SampleGrad ignores sampler MipLODBias, so we manually apply the LOD bias + // (needed for FSR upscaling to produce sharp textures at lower resolutions) + float biasFactor = exp2(DIST_LodBias); + float2 gradX = ddx(rawUV) * atlasScale * biasFactor; + float2 gradY = ddy(rawUV) * atlasScale * biasFactor; + float2 atlasUV = Input.vAtlasRect.xy + frac(rawUV) * atlasScale; + + float4 color = TX_AtlasArray.SampleGrad(SS_Linear, float3(atlasUV, slice), gradX, gradY); + + ClipDistanceEffect(length(Input.vViewPosition), DIST_DrawDistance, color.r * 2 - 1, 500.0f); + DoAlphaTest(color.a); +} + +// Disable regular shader +DEFERRED_PS_OUTPUT PSMainDISABLED( PS_INPUT Input ) : SV_TARGET +#else DEFERRED_PS_OUTPUT PSMain( PS_INPUT Input ) : SV_TARGET +#endif { DEFERRED_PS_OUTPUT output; output.vReactiveMask = 0.0f; From da715fc949b63c34564ca7a1248fdc4f83f2cb8c Mon Sep 17 00:00:00 2001 From: kirides <13602143+kirides@users.noreply.github.com> Date: Tue, 7 Apr 2026 14:26:16 +0200 Subject: [PATCH 39/42] optimize `DrawSkeletalMeshVobs` by drawing skeletals Inline and reducing API overhead. --- D3D11Engine/D3D11GraphicsEngine.cpp | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/D3D11Engine/D3D11GraphicsEngine.cpp b/D3D11Engine/D3D11GraphicsEngine.cpp index b4409119..ed2954e1 100644 --- a/D3D11Engine/D3D11GraphicsEngine.cpp +++ b/D3D11Engine/D3D11GraphicsEngine.cpp @@ -5624,6 +5624,10 @@ void XM_CALLCONV D3D11GraphicsEngine::DrawWorldAroundForWorldShadow( FXMVECTOR p // Draw dynamic vobs (spawned at runtime, not part of m_StaticVobs or atlas) if ( !dynamicVobCasters.empty() ) { // Group by visual for instanced drawing + + PShaderID alphaTestShader = (GetRenderingStage() == DES_SHADOWMAP) || (GetRenderingStage() == DES_SHADOWMAP_CUBE) + ? PShaderID::PS_DiffuseAtlasAlphaTestShadows + : PShaderID::PS_DiffuseAtlasAlphaTest; std::vector dynBatches; std::unordered_map batchIndex; @@ -5683,7 +5687,7 @@ void XM_CALLCONV D3D11GraphicsEngine::DrawWorldAroundForWorldShadow( FXMVECTOR p if ( linearDepth ) { SetActivePixelShader( PShaderID::PS_LinDepth ); } else { - SetActivePixelShader( PShaderID::PS_DiffuseAlphaTest ); + SetActivePixelShader( alphaTestShader ); Context->PSSetShader( nullptr, nullptr, 0 ); } @@ -5704,6 +5708,7 @@ void XM_CALLCONV D3D11GraphicsEngine::DrawWorldAroundForWorldShadow( FXMVECTOR p XMFLOAT3 vPlayerPosition = Engine::GAPI->GetPlayerVob() ? Engine::GAPI->GetPlayerVob()->GetPositionWorld() : XMFLOAT3( 0, 0, 0 ); g_windBuffer.playerPos = float3( vPlayerPosition.x, vPlayerPosition.y, vPlayerPosition.z ); + bool hasPS = false; for ( auto const& batch : dynBatches ) { if ( batch.Instances.empty() ) continue; @@ -5734,13 +5739,17 @@ void XM_CALLCONV D3D11GraphicsEngine::DrawWorldAroundForWorldShadow( FXMVECTOR p if ( bindTexture ) { if ( alphaRef > 0.0f && tx->CacheIn( 0.6f ) == zRES_CACHED_IN ) { tx->Bind( 0 ); - ActivePS->Apply(); + if (!hasPS) { + ActivePS->Apply(); + hasPS = true; + } previousTx = tx; } else continue; } else { - if ( !linearDepth ) { + if ( !linearDepth && hasPS ) { Context->PSSetShader( nullptr, nullptr, 0 ); + hasPS = false; } } From 69be7e33fae3d47072490908fe95293d10d8fad0 Mon Sep 17 00:00:00 2001 From: kirides <13602143+kirides@users.noreply.github.com> Date: Sat, 18 Apr 2026 20:31:31 +0200 Subject: [PATCH 40/42] fix merge --- D3D11Engine/D3D11GraphicsEngine.cpp | 4 ++-- D3D11Engine/D3D11ShadowMap.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/D3D11Engine/D3D11GraphicsEngine.cpp b/D3D11Engine/D3D11GraphicsEngine.cpp index ed2954e1..3b896536 100644 --- a/D3D11Engine/D3D11GraphicsEngine.cpp +++ b/D3D11Engine/D3D11GraphicsEngine.cpp @@ -5609,7 +5609,7 @@ void XM_CALLCONV D3D11GraphicsEngine::DrawWorldAroundForWorldShadow( FXMVECTOR p MeshInfo* mi = mlist[i]; // Draw batch - DrawInstanced( mi->MeshVertexBuffer.get(), mi->MeshIndexBuffer.get(), + DrawInstanced( mi->MeshVertexBuffer, mi->MeshIndexBuffer, mi->Indices.size(), DynamicInstancingBuffer.get(), sizeof( VobInstanceInfo ), b.Instances.size(), sizeof( ExVertexStruct ), b.StartInstanceNum ); @@ -5755,7 +5755,7 @@ void XM_CALLCONV D3D11GraphicsEngine::DrawWorldAroundForWorldShadow( FXMVECTOR p MeshInfo* mi = mlist[i]; - DrawInstanced( mi->MeshVertexBuffer.get(), mi->MeshIndexBuffer.get(), + DrawInstanced( mi->MeshVertexBuffer, mi->MeshIndexBuffer, mi->Indices.size(), DynamicInstancingBuffer.get(), sizeof( VobInstanceInfo ), batch.Instances.size(), sizeof( ExVertexStruct ), batch.StartInstanceNum ); diff --git a/D3D11Engine/D3D11ShadowMap.cpp b/D3D11Engine/D3D11ShadowMap.cpp index dc083471..ee17842e 100644 --- a/D3D11Engine/D3D11ShadowMap.cpp +++ b/D3D11Engine/D3D11ShadowMap.cpp @@ -683,7 +683,7 @@ XRESULT D3D11ShadowMap::DrawPointlightShadows( std::vector& light auto& settings = Engine::GAPI->GetRendererState().RendererSettings; // Release any resources of not visible lights - for ( auto& it : Engine::GAPI->VobLightMap ) { + for ( auto& it : Engine::GAPI->VobLights_Sorted ) { if ( it.second->LightShadowBuffers && (!it.second->Vob->IsEnabled() || !it.second->VisibleInFrame) ) { if ( D3D11PointLight* pl = static_cast(it.second->LightShadowBuffers.get()) ) { From f6b243577dbf0234b87a187d98c28608c8bc4f5c Mon Sep 17 00:00:00 2001 From: kirides <13602143+kirides@users.noreply.github.com> Date: Sat, 18 Apr 2026 21:11:37 +0200 Subject: [PATCH 41/42] fix bad shader --- D3D11Engine/D3D11VobAtlasPass.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/D3D11Engine/D3D11VobAtlasPass.cpp b/D3D11Engine/D3D11VobAtlasPass.cpp index 99098552..42cd122d 100644 --- a/D3D11Engine/D3D11VobAtlasPass.cpp +++ b/D3D11Engine/D3D11VobAtlasPass.cpp @@ -575,10 +575,14 @@ XRESULT D3D11VobAtlasPass::Draw( const Frustum& frustum, bool bindPS ) { MaterialInfo defMaterial{}; GSky* sky = Engine::GAPI->GetSky(); - m_Engine->SetActivePixelShader( PShaderID::PS_DiffuseAtlas ); + context->PSSetShader( nullptr, nullptr, 0 ); auto lastPs = PShaderID::COUNT; GraphicsShaderConstantBuffer buffersToBind[3] = {}; + const PShaderID alphaTestPS = m_Engine->GetRenderingStage() == D3D11ENGINE_RENDER_STAGE::DES_SHADOWMAP + ? PShaderID::PS_DiffuseAtlasAlphaTestShadows + : PShaderID::PS_DiffuseAtlasAlphaTest; + for ( auto& group : m_AtlasDrawGroups ) { ID3D11ShaderResourceView* srv = m_TextureAtlasses[group.format].atlasSRV; if ( !srv ) @@ -591,7 +595,7 @@ XRESULT D3D11VobAtlasPass::Draw( const Frustum& frustum, bool bindPS ) { auto newPs = (bindPS && group.format != DXGI_FORMAT_BC2_UNORM) ? PShaderID::PS_DiffuseAtlas - : PShaderID::PS_DiffuseAtlasAlphaTest; + : alphaTestPS; if ( newPs != lastPs ) { m_Engine->SetActivePixelShader( newPs ); From 0320d94028dedf36c79b5bb4a83c3e3358c044d1 Mon Sep 17 00:00:00 2001 From: kirides <13602143+kirides@users.noreply.github.com> Date: Sat, 18 Apr 2026 21:17:23 +0200 Subject: [PATCH 42/42] Atlas fix bug where removed static visual was still drawn an possibly crashes game --- D3D11Engine/D3D11GraphicsEngine.cpp | 11 +++++++++ D3D11Engine/D3D11VobAtlasPass.cpp | 37 +++++++++++++++++++++++++++++ D3D11Engine/D3D11VobAtlasPass.h | 7 ++++++ D3D11Engine/VobCulling.cpp | 20 +++++++++------- 4 files changed, 67 insertions(+), 8 deletions(-) diff --git a/D3D11Engine/D3D11GraphicsEngine.cpp b/D3D11Engine/D3D11GraphicsEngine.cpp index 3b896536..78517c08 100644 --- a/D3D11Engine/D3D11GraphicsEngine.cpp +++ b/D3D11Engine/D3D11GraphicsEngine.cpp @@ -7564,6 +7564,17 @@ void D3D11GraphicsEngine::DrawFrameParticles( XRESULT D3D11GraphicsEngine::OnVobRemovedFromWorld( zCVob* vob ) { if ( Engine::ImGuiHandle ) Engine::ImGuiHandle->OnVobRemovedFromWorld( vob ); + // Remove from atlas GPU-culling buffer so it stops drawing + if ( m_VobAtlasPass ) m_VobAtlasPass->OnVobRemovedFromWorld( vob ); + + for ( int32_t i = 0; i < m_StaticVobs.size(); ++i ) { + // remove from static vob cache aswell, by moving to the end and resizing the vector, since order doesn't matter in the cache + if ( m_StaticVobs[i] && m_StaticVobs[i]->Vob == vob ) { + m_StaticVobs[i] = nullptr; + break; + } + } + // Take out of shadowupdate queue for ( auto&& it = FrameShadowUpdateLights.begin(); it != FrameShadowUpdateLights.end(); ++it ) { if ( (*it)->Vob == vob ) { diff --git a/D3D11Engine/D3D11VobAtlasPass.cpp b/D3D11Engine/D3D11VobAtlasPass.cpp index 42cd122d..faef4d0d 100644 --- a/D3D11Engine/D3D11VobAtlasPass.cpp +++ b/D3D11Engine/D3D11VobAtlasPass.cpp @@ -47,6 +47,8 @@ void D3D11VobAtlasPass::Build() { m_TextureAtlasses[(DXGI_FORMAT)i].Destroy(); m_TextureAtlasLookup.clear(); m_AtlasDrawGroups.clear(); + m_VobGPUDataCPU.clear(); + m_VobToGPUIndex.clear(); if ( !SupportTextureAtlases || !Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.FeatureSet.EnableAtlasStaticVobs ) { @@ -396,6 +398,14 @@ void D3D11VobAtlasPass::BuildGPUCullingBuffers() { vobGPU.push_back( data ); } + // Keep CPU-side copy and build vob-pointer lookup for runtime removal + m_VobGPUDataCPU = vobGPU; + m_VobToGPUIndex.clear(); + m_VobToGPUIndex.reserve( m_Engine->m_StaticVobs.size() ); + for ( size_t i = 0; i < m_Engine->m_StaticVobs.size(); i++ ) { + m_VobToGPUIndex[m_Engine->m_StaticVobs[i]->Vob] = static_cast(i); + } + // --- 5. Upload to GPU --- auto* device = m_Engine->GetDevice().Get(); auto* context = m_Engine->GetContext().Get(); @@ -455,6 +465,33 @@ void D3D11VobAtlasPass::BuildGPUCullingBuffers() { << m_TotalMaxInstances << " max instances"; } +// ============================================================ +// OnVobRemovedFromWorld – hide a removed vob without rebuild +// ============================================================ +void D3D11VobAtlasPass::OnVobRemovedFromWorld( zCVob* vob ) { + auto it = m_VobToGPUIndex.find( vob ); + if ( it == m_VobToGPUIndex.end() ) + return; + + UINT idx = it->second; + m_VobToGPUIndex.erase( it ); + + // Zero submeshCount so the CS never emits instances for this vob + m_VobGPUDataCPU[idx].submeshCount = 0; + + // Upload only the modified element to the GPU via UpdateSubresource + D3D11_BOX + D3D11_BOX box = {}; + box.left = idx * sizeof( VobGPUData ); + box.right = box.left + sizeof( VobGPUData ); + box.top = 0; + box.bottom = 1; + box.front = 0; + box.back = 1; + m_Engine->GetContext()->UpdateSubresource( + m_VobGPUBuffer->GetBuffer(), 0, &box, + &m_VobGPUDataCPU[idx], 0, 0 ); +} + // ============================================================ // Draw – per-frame GPU-cull + indirect draw // ============================================================ diff --git a/D3D11Engine/D3D11VobAtlasPass.h b/D3D11Engine/D3D11VobAtlasPass.h index 5c965261..05a29abe 100644 --- a/D3D11Engine/D3D11VobAtlasPass.h +++ b/D3D11Engine/D3D11VobAtlasPass.h @@ -41,6 +41,9 @@ class D3D11VobAtlasPass { * bindPS=false is used in shadow passes to skip the pixel shader. */ XRESULT Draw( const Frustum& frustum, bool bindPS = true ); + /** Mark a removed vob as invisible in the GPU buffer without rebuilding. */ + void OnVobRemovedFromWorld( class zCVob* vob ); + /** True once Build() has completed and at least one draw group exists. */ bool IsReady() const { return !m_AtlasDrawGroups.empty(); } @@ -75,6 +78,10 @@ class D3D11VobAtlasPass { std::vector m_MergedArgsReset; UINT m_TotalMaxInstances = 0; + // CPU-side cache for targeted per-vob GPU updates on removal + std::vector m_VobGPUDataCPU; + std::unordered_map m_VobToGPUIndex; + void BuildTextureAtlasses(); void BuildGeometryBuffers(); void BuildGPUCullingBuffers(); diff --git a/D3D11Engine/VobCulling.cpp b/D3D11Engine/VobCulling.cpp index db0b0e52..44a6ea59 100644 --- a/D3D11Engine/VobCulling.cpp +++ b/D3D11Engine/VobCulling.cpp @@ -86,10 +86,12 @@ void VobCulling::CullAndGatherStaticVOBs_AVX2( uint32_t instanceIdx = (i * 8) + bitIndex; // Push to dense render queue - outRenderQueue.push_back( { - instanceIdx, - reinterpret_cast(instances[instanceIdx]->VisualInfo), - } ); + if (instances[instanceIdx]) { + outRenderQueue.push_back( { + instanceIdx, + reinterpret_cast(instances[instanceIdx]->VisualInfo), + } ); + } // Clear the lowest set bit so we can find the next one // e.g., 010100 -> 010000 @@ -141,10 +143,12 @@ void VobCulling::CullAndGatherStaticVOBs_DirectXMath( if ( visible ) { uint32_t instanceIdx = static_cast(i * 8 + j); - outRenderQueue.push_back( { - instanceIdx, - reinterpret_cast(instances[instanceIdx]->VisualInfo), - } ); + if (instances[instanceIdx]) { + outRenderQueue.push_back( { + instanceIdx, + reinterpret_cast(instances[instanceIdx]->VisualInfo), + } ); + } } } }