diff --git a/D3D11Engine/BaseGraphicsEngine.h b/D3D11Engine/BaseGraphicsEngine.h index 174c4a39..560e683c 100644 --- a/D3D11Engine/BaseGraphicsEngine.h +++ b/D3D11Engine/BaseGraphicsEngine.h @@ -230,4 +230,6 @@ class BaseGraphicsEngine { virtual XRESULT UpdateRenderStates() { return XR_SUCCESS; }; virtual std::unique_ptr RecordGraphicsEvent( LPCWSTR region ) { return std::make_unique(); } + + virtual void OnWorldLoaded() {}; }; diff --git a/D3D11Engine/ConstantBufferStructs.h b/D3D11Engine/ConstantBufferStructs.h index 4bd715d3..e5c5c282 100644 --- a/D3D11Engine/ConstantBufferStructs.h +++ b/D3D11Engine/ConstantBufferStructs.h @@ -16,17 +16,104 @@ struct VobInstanceInfo { DWORD GP_Slot; }; -/** Remap-index for the static vobs */ -struct VobInstanceRemapInfo { - bool operator < ( const VobInstanceRemapInfo& b ) const { - return InstanceRemapIndex < b.InstanceRemapIndex; - } - - bool operator == ( const VobInstanceRemapInfo& o ) const { - return InstanceRemapIndex == o.InstanceRemapIndex; - } - - DWORD InstanceRemapIndex; +struct VobInstanceInfoAtlas { + XMFLOAT4X4 world; + XMFLOAT4X4 prevWorld; // Previous frame's world matrix for motion vectors + DWORD color; + float windStrenth; + float canBeAffectedByPlayer; + // Texture Atlas information, directly stored in the instance data for easy access in shader without needing an extra StructuredBuffer + int slice; + float uStart; + float vStart; + float uEnd; + float vEnd; + UINT globalSourceIndex; // global source index into feedback texture + float minHeight; // BBox.Min.y for per-vob wind calculations + float maxHeight; // BBox.Max.y for per-vob wind calculations +}; + +// Descriptor returned for use with shader +// Points to a specific slice in the Texture2DArray atlas, along with UV coordinates for sampling that slice +// this is pointed to from VobInstanceInfo GP_Slot into a StructuredBuffer, which is then indexed in the shader to get the correct slice/UVs for each instance +struct TextureDescriptor { + int slice; + float uStart; + float vStart; + float uEnd; + float vEnd; +}; + +// CPU-side lookup: maps a zCTexture* to its atlas placement +struct TextureAtlasLookup { + DXGI_FORMAT atlasFormat; + TextureDescriptor descriptor; +}; + +// Per-vob data uploaded once at world load, read by GPU cull compute shader +struct VobGPUData { + XMFLOAT3 aabbCenter; + float pad0; + XMFLOAT3 aabbExtent; + float pad1; + XMFLOAT4X4 world; + XMFLOAT4X4 prevWorld; + DWORD color; + float aniModeStrength; + float canBeAffectedByPlayer; + UINT submeshStart; // index into SubmeshGPUData[] + UINT submeshCount; // how many submeshes this vob maps to + float minHeight; // BBox.Min.y for per-vob wind calculations + float maxHeight; // BBox.Max.y for per-vob wind calculations + UINT pad2; +}; + +// Per-submesh lookup, shared across all vobs with the same visual +struct SubmeshGPUData { + int slice; + float uStart, vStart, uEnd, vEnd; + UINT argIndex; // index into merged indirect args + UINT instanceBaseOffset; // fixed write offset in instance buffer + UINT globalSourceIndex; // global source index into feedback texture +}; + +// Per-submesh data for the world mesh atlas indirect draw path. +// Read by VS_ExWorldAtlas via StructuredBuffer. +struct WorldMeshSubmeshGPUData { + // Diffuse atlas + int diffuseSlice; + float dUStart, dVStart, dUEnd, dVEnd; + // Normal atlas + int normalSlice; + float nUStart, nVStart, nUEnd, nVEnd; + // FX atlas + int fxSlice; + float fUStart, fVStart, fUEnd, fVEnd; + // Flags: 1 = HAS_NORMAL, 2 = HAS_FX, 4 = ALPHA_TEST + UINT flags; +}; + +// Constant buffer for the GPU cull compute shader +struct CullConstants { + XMFLOAT4 frustumPlanes[6]; + XMFLOAT3 cameraPosition; + float drawDistance; + float globalWindStrength; + UINT windAdvanced; + UINT numVobs; + UINT feedbackFrameNumber; // >0 = write feedback in CS; 0 = disabled (e.g. shadow pass) + UINT enableHiZ; // 1 = Hi-Z occlusion culling enabled + UINT hiZMipCount; + float hiZWidth; // Hi-Z mip 0 dimensions (full depth buffer size) + float hiZHeight; + XMFLOAT4X4 viewProjection; // Current frame view-projection matrix for Hi-Z reprojection +}; + +struct HiZBuildConstants { + UINT outputWidth; + UINT outputHeight; + UINT inputMipLevel; + UINT isCopyPass; // 1 = copy from depth buffer (mip 0), 0 = downsample from previous mip }; #pragma pack (push, 1) diff --git a/D3D11Engine/D3D11AtlasTypes.h b/D3D11Engine/D3D11AtlasTypes.h new file mode 100644 index 00000000..05423800 --- /dev/null +++ b/D3D11Engine/D3D11AtlasTypes.h @@ -0,0 +1,31 @@ +#pragma once +#include "D3D11TextureAtlasManager.h" +#include "D3D11IndirectBuffer.h" +#include "ConstantBufferStructs.h" + +#include +#include +#include + +// Shared atlas constants +constexpr size_t TEXTURE_ATLAS_MAX = DXGI_FORMAT_V408 + 1; +struct MeshVisualInfo; + +// Tracks one unique submesh in the global geometry buffer +struct StaticSubmeshEntry { + UINT indexCount; + UINT startIndexLocation; // offset into global IB + int baseVertexLocation; // offset into global VB + TextureDescriptor atlasDesc; + MeshVisualInfo* visual; // which visual owns this submesh +}; + +// Groups all submeshes that share one atlas (same DXGI_FORMAT) +struct AtlasDrawGroup { + DXGI_FORMAT format = DXGI_FORMAT_UNKNOWN; + std::vector submeshes; + std::vector indirectArgs; + std::unique_ptr indirectBuffer; + UINT mergedArgsOffset = 0; // byte offset into merged indirect args buffer + UINT mergedArgsCount = 0; // number of args in this group +}; diff --git a/D3D11Engine/D3D11CascadedShadowMapBuffer.cpp b/D3D11Engine/D3D11CascadedShadowMapBuffer.cpp index 0139b414..ee7e9ccf 100644 --- a/D3D11Engine/D3D11CascadedShadowMapBuffer.cpp +++ b/D3D11Engine/D3D11CascadedShadowMapBuffer.cpp @@ -115,13 +115,13 @@ ID3D11ShaderResourceView* D3D11CascadedShadowMapBuffer::GetShaderResourceView() return m_srv.Get(); } -void D3D11CascadedShadowMapBuffer::BindToPixelShader( ID3D11DeviceContext1* context, UINT slot ) const { +void D3D11CascadedShadowMapBuffer::BindToPixelShader( ID3D11DeviceContext* context, UINT slot ) const { if ( m_srv ) { context->PSSetShaderResources( slot, 1, m_srv.GetAddressOf() ); } } -void D3D11CascadedShadowMapBuffer::BindToVertexShader( ID3D11DeviceContext1* context, UINT slot ) const { +void D3D11CascadedShadowMapBuffer::BindToVertexShader( ID3D11DeviceContext* context, UINT slot ) const { if ( m_srv ) { context->VSSetShaderResources( slot, 1, m_srv.GetAddressOf() ); } diff --git a/D3D11Engine/D3D11CascadedShadowMapBuffer.h b/D3D11Engine/D3D11CascadedShadowMapBuffer.h index 694c205b..260c53e6 100644 --- a/D3D11Engine/D3D11CascadedShadowMapBuffer.h +++ b/D3D11Engine/D3D11CascadedShadowMapBuffer.h @@ -50,14 +50,14 @@ class D3D11CascadedShadowMapBuffer { * @param context Device context * @param slot Shader resource slot */ - void BindToPixelShader( ID3D11DeviceContext1* context, UINT slot ) const; + void BindToPixelShader( ID3D11DeviceContext* context, UINT slot ) const; /** * Bind the texture array to a vertex shader slot. * @param context Device context * @param slot Shader resource slot */ - void BindToVertexShader( ID3D11DeviceContext1* context, UINT slot ) const; + void BindToVertexShader( ID3D11DeviceContext* context, UINT slot ) const; /** Get the size of each cascade (width = height) */ UINT GetSize() const { return m_size; } diff --git a/D3D11Engine/D3D11CommandList.cpp b/D3D11Engine/D3D11CommandList.cpp new file mode 100644 index 00000000..9fd631c0 --- /dev/null +++ b/D3D11Engine/D3D11CommandList.cpp @@ -0,0 +1,11 @@ +#include "pch.h" +#include "D3D11CommandList.h" +#include "D3D11VertexBuffer.h" + +void D3D11CommandList::IASetVertexBuffer( D3D11VertexBuffer* vb, UINT stride, UINT offset ) { + m_Context->IASetVertexBuffers( 0, 1, vb->GetVertexBuffer().GetAddressOf(), &stride, &offset ); +} + +void D3D11CommandList::IASetIndexBuffer( D3D11VertexBuffer* ib, DXGI_FORMAT format, UINT offset ) { + m_Context->IASetIndexBuffer( ib->GetVertexBuffer().Get(), format, offset ); +} diff --git a/D3D11Engine/D3D11CommandList.h b/D3D11Engine/D3D11CommandList.h new file mode 100644 index 00000000..41e4ee24 --- /dev/null +++ b/D3D11Engine/D3D11CommandList.h @@ -0,0 +1,140 @@ +#pragma once +#include "pch.h" +#include "D3D11PipelineStateObject.h" + +class D3D11VertexBuffer; + +/** + * Slim command-list wrapper around an ID3D11DeviceContext1 and the + * D3D11PipelineStateCache. + * + * Provides SetPipelineState() plus the commonly used Draw / IA / OM + * helpers so that call-sites read like a modern graphics API without + * touching the raw context or the engine's global render-state machine. + * + * The object is intentionally cheap to construct (two pointers) and + * does not own any resources. + */ +struct D3D11CommandList { + + D3D11CommandList() = default; + D3D11CommandList( ID3D11DeviceContext* context, D3D11PipelineStateCache* cache ) + : m_Context( context ), m_Cache( cache ) {} + + // --- Pipeline state ------------------------------------------------------ + + void SetPipelineState( const D3D11PipelineStateObject& pso ) { + m_Cache->SetPipelineState( pso ); + } + + /** Force the cache to re-bind everything on next SetPipelineState. */ + void InvalidatePipelineState() { + m_Cache->Invalidate(); + } + + // --- Input assembly ------------------------------------------------------ + + void IASetVertexBuffer( D3D11VertexBuffer* vb, UINT stride, UINT offset = 0 ); + + void IASetVertexBuffers( UINT startSlot, + UINT numBuffers, + ID3D11Buffer* const* buffers, + const UINT* strides, + const UINT* offsets ) { + m_Context->IASetVertexBuffers( startSlot, numBuffers, buffers, strides, offsets ); + } + + void IASetIndexBuffer( ID3D11Buffer* buffer, DXGI_FORMAT format, UINT offset = 0 ) { + m_Context->IASetIndexBuffer( buffer, format, offset ); + } + + void IASetIndexBuffer( D3D11VertexBuffer* ib, DXGI_FORMAT format, UINT offset = 0 ); + + // --- Draw calls ---------------------------------------------------------- + + void Draw( UINT vertexCount, UINT startVertexLocation = 0 ) { + m_Context->Draw( vertexCount, startVertexLocation ); + m_DrawnTriangles += vertexCount / 3; + } + + void DrawIndexed( UINT indexCount, + UINT startIndexLocation = 0, + INT baseVertexLocation = 0 ) { + m_Context->DrawIndexed( indexCount, startIndexLocation, baseVertexLocation ); + m_DrawnTriangles += indexCount / 3; + } + + void DrawInstanced( UINT vertexCountPerInstance, + UINT instanceCount, + UINT startVertexLocation = 0, + UINT startInstanceLocation = 0 ) { + m_Context->DrawInstanced( vertexCountPerInstance, instanceCount, + startVertexLocation, startInstanceLocation ); + m_DrawnTriangles += ( vertexCountPerInstance / 3 ) * instanceCount; + } + + void DrawIndexedInstanced( UINT indexCountPerInstance, + UINT instanceCount, + UINT startIndexLocation = 0, + INT baseVertexLocation = 0, + UINT startInstanceLocation = 0 ) { + m_Context->DrawIndexedInstanced( indexCountPerInstance, instanceCount, + startIndexLocation, baseVertexLocation, + startInstanceLocation ); + m_DrawnTriangles += ( indexCountPerInstance / 3 ) * instanceCount; + } + + void DrawIndexedInstancedIndirect( ID3D11Buffer* argsBuffer, + UINT alignedByteOffsetForArgs ) { + m_Context->DrawIndexedInstancedIndirect( argsBuffer, alignedByteOffsetForArgs ); + // Triangle count unknown for indirect draws + } + + // --- Render target / viewport helpers ------------------------------------ + + void OMSetRenderTargets( UINT numViews, + ID3D11RenderTargetView* const* rtvs, + ID3D11DepthStencilView* dsv ) { + m_Context->OMSetRenderTargets( numViews, rtvs, dsv ); + } + + void RSSetViewports( UINT numViewports, const D3D11_VIEWPORT* viewports ) { + m_Context->RSSetViewports( numViewports, viewports ); + } + + void RSGetViewports( UINT* numViewports, D3D11_VIEWPORT* viewports ) { + m_Context->RSGetViewports( numViewports, viewports ); + } + + void ClearDepthStencilView( ID3D11DepthStencilView* dsv, + UINT clearFlags, + float depth, + UINT8 stencil ) { + m_Context->ClearDepthStencilView( dsv, clearFlags, depth, stencil ); + } + + void ClearRenderTargetView( ID3D11RenderTargetView* rtv, const float color[4] ) { + m_Context->ClearRenderTargetView( rtv, color ); + } + + // --- Stats --------------------------------------------------------------- + + /** Return triangles drawn since last ResetStats() and reset counter. */ + UINT FlushDrawnTriangles() { + UINT t = m_DrawnTriangles; + m_DrawnTriangles = 0; + return t; + } + + UINT GetDrawnTriangles() const { return m_DrawnTriangles; } + + // --- Raw access (escape hatch) ------------------------------------------- + + ID3D11DeviceContext* GetContext() const { return m_Context; } + D3D11PipelineStateCache* GetPSOCache() const { return m_Cache; } + +private: + ID3D11DeviceContext* m_Context = nullptr; + D3D11PipelineStateCache* m_Cache = nullptr; + UINT m_DrawnTriangles = 0; +}; diff --git a/D3D11Engine/D3D11Effect.cpp b/D3D11Engine/D3D11Effect.cpp index 3bfecfe3..fd0d6c16 100644 --- a/D3D11Engine/D3D11Effect.cpp +++ b/D3D11Engine/D3D11Effect.cpp @@ -219,15 +219,12 @@ XRESULT D3D11Effect::DrawRain() { // Set alphablending state.BlendState.SetAlphaBlending(); - state.BlendState.SetDirty(); // Disable depth-write state.DepthState.DepthWriteEnabled = false; - state.DepthState.SetDirty(); // Disable culling state.RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_NONE; - state.RasterizerState.SetDirty(); // Rendering instances only e->GetContext()->IASetPrimitiveTopology( D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP ); @@ -376,15 +373,12 @@ XRESULT D3D11Effect::DrawRain_CS() { // Set alphablending state.BlendState.SetAlphaBlending(); - state.BlendState.SetDirty(); // Disable depth-write state.DepthState.DepthWriteEnabled = false; - state.DepthState.SetDirty(); // Disable culling state.RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_NONE; - state.RasterizerState.SetDirty(); // Rendering instances only e->GetContext()->IASetPrimitiveTopology( D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP ); diff --git a/D3D11Engine/D3D11Engine.vcxproj b/D3D11Engine/D3D11Engine.vcxproj index 02df1227..ee9c019d 100644 --- a/D3D11Engine/D3D11Engine.vcxproj +++ b/D3D11Engine/D3D11Engine.vcxproj @@ -801,6 +801,7 @@ copy "$(OutDir)$(TargetName).pdb" "$(G1_SYSTEM_PATH)\ddraw.pdb" + @@ -819,6 +820,7 @@ copy "$(OutDir)$(TargetName).pdb" "$(G1_SYSTEM_PATH)\ddraw.pdb" + @@ -841,6 +843,8 @@ copy "$(OutDir)$(TargetName).pdb" "$(G1_SYSTEM_PATH)\ddraw.pdb" + + @@ -848,8 +852,11 @@ copy "$(OutDir)$(TargetName).pdb" "$(G1_SYSTEM_PATH)\ddraw.pdb" + + + @@ -993,6 +1000,7 @@ copy "$(OutDir)$(TargetName).pdb" "$(G1_SYSTEM_PATH)\ddraw.pdb" + @@ -1077,6 +1085,7 @@ copy "$(OutDir)$(TargetName).pdb" "$(G1_SYSTEM_PATH)\ddraw.pdb" + @@ -1099,6 +1108,8 @@ copy "$(OutDir)$(TargetName).pdb" "$(G1_SYSTEM_PATH)\ddraw.pdb" + + @@ -1107,6 +1118,7 @@ copy "$(OutDir)$(TargetName).pdb" "$(G1_SYSTEM_PATH)\ddraw.pdb" + ../pch.h @@ -1209,6 +1221,7 @@ copy "$(OutDir)$(TargetName).pdb" "$(G1_SYSTEM_PATH)\ddraw.pdb" + @@ -1298,6 +1311,7 @@ copy "$(OutDir)$(TargetName).pdb" "$(G1_SYSTEM_PATH)\ddraw.pdb" + @@ -1307,6 +1321,7 @@ copy "$(OutDir)$(TargetName).pdb" "$(G1_SYSTEM_PATH)\ddraw.pdb" + diff --git a/D3D11Engine/D3D11Engine.vcxproj.filters b/D3D11Engine/D3D11Engine.vcxproj.filters index f300b42c..218393df 100644 --- a/D3D11Engine/D3D11Engine.vcxproj.filters +++ b/D3D11Engine/D3D11Engine.vcxproj.filters @@ -383,6 +383,13 @@ ZenGin\Classes + + + Engine\D3D11 + + + Engine\D3D11 + Engine\D3D11\PFX\Effects @@ -839,6 +846,24 @@ ZenGin\Classes + + Engine + + + Engine + + + + + + Engine\D3D11 + + + Engine\D3D11 + + + Engine\D3D11 + @@ -869,6 +894,15 @@ Engine\D3D11 + + Engine\D3D11 + + + Engine\D3D11 + + + Engine\D3D11 + Engine\D3D11 @@ -1138,6 +1172,18 @@ ZenGin\Classes + + Engine + + + Engine + + + Engine\D3D11 + + + Engine\D3D11 + diff --git a/D3D11Engine/D3D11GraphicsEngine.cpp b/D3D11Engine/D3D11GraphicsEngine.cpp index 78e4e94c..78517c08 100644 --- a/D3D11Engine/D3D11GraphicsEngine.cpp +++ b/D3D11Engine/D3D11GraphicsEngine.cpp @@ -1,6 +1,9 @@ #include "D3D11GraphicsEngine.h" #include "D3D11ShadowMap.h" +#include "D3D11VobAtlasPass.h" +#include "D3D11MeshAtlasPass.h" + #include "AlignedAllocator.h" #include "D3D11Effect.h" #include "D3D11GShader.h" @@ -54,6 +57,7 @@ #include "zCOption.h" #include "RenderGraph.h" #include "RGBuilder.h" +#include "D3D11TextureAtlasManager.h" #ifdef BUILD_SPACER #define IS_SPACER_BUILD true @@ -97,6 +101,7 @@ static std::unique_ptr igdextDevice; static std::unique_ptr agsDevice; extern bool userHaveAMDGPU; +bool SupportTextureAtlases = false; namespace { @@ -165,6 +170,9 @@ D3D11GraphicsEngine::D3D11GraphicsEngine() { m_lowlatency = false; m_isWindowActive = false; + m_VobAtlasPass = std::make_unique( this ); + m_MeshAtlasPass = std::make_unique( this ); + // Initialize previous view-proj matrix to identity for motion vectors XMStoreFloat4x4( &m_PrevViewProjMatrix, XMMatrixIdentity() ); @@ -283,6 +291,7 @@ void D3D11GraphicsEngine::CreateAndBindDefaultSampler() { float scaleRatio = static_cast(GetScaledResolution().x) / static_cast(GetBackbufferResolution().x); // Calculate raw bias, but clamp it to a maximum of 0.0f to protect Supersampling float mipBias = std::min(0.0f, std::log2(scaleRatio)); + m_SamplerMipBias = mipBias; D3D11_SAMPLER_DESC samplerDesc{}; samplerDesc.Filter = D3D11_FILTER_ANISOTROPIC; @@ -593,6 +602,21 @@ XRESULT D3D11GraphicsEngine::Init() { Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.FeatureSet.UseLayeredRendering = FeatureRTArrayIndexFromAnyShader; } + if (maxFeatureLevel >= D3D_FEATURE_LEVEL::D3D_FEATURE_LEVEL_11_0) { + // check amount of GPU Memory available + constexpr uint64_t GiB = 1024ull * 1024ull * 1024ull; + if ( adpDesc.DedicatedVideoMemory >= 3 * GiB ) { + // currently we just assume everything fits into memory. + // in the future we should make use of Tiled Resources, which would allow us + // to support more memory intensive features, even on less than 4GB cards, by streaming in the necessary tiles. + // but that's very expensive on the CPU and requires deferred loading of textures, and a lot of management. + SupportTextureAtlases = true; + Engine::GAPI->GetRendererState().RendererSettings.UseIndirectVobShadows = SupportTextureAtlases; + Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.FeatureSet.EnableAtlasWorldMesh = SupportTextureAtlases; + Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.FeatureSet.EnableAtlasStaticVobs = SupportTextureAtlases; + } + } + LogInfo() << "Creating ShaderManager"; ShaderManager = std::make_unique(); ShaderManager->Init(); @@ -773,6 +797,9 @@ XRESULT D3D11GraphicsEngine::Init() { D3D11VertexBuffer::EBindFlags::B_INDEXBUFFER, D3D11VertexBuffer::EUsageFlags::U_IMMUTABLE ); + // Initialize pipeline state cache + m_PipelineStateCache.Init( Device.Get(), Context.Get() ); + // Create shadow map manager ShadowMaps = std::make_unique(); int initialShadowSize = Engine::GAPI->GetRendererState().RendererSettings.ShadowMapSize; @@ -957,6 +984,9 @@ XRESULT D3D11GraphicsEngine::RecreateBuffers() { GetDevice().Get(), roundedTextureResolution.x, roundedTextureResolution.y, DXGI_FORMAT_R32_TYPELESS, nullptr, DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32_FLOAT ); + // Create / recreate Hi-Z pyramid resources to match new depth buffer size + CreateHiZResources(); + // Create PFX-Renderer if ( !PfxRenderer ) PfxRenderer = std::make_unique(); @@ -1329,7 +1359,6 @@ XRESULT D3D11GraphicsEngine::OnBeginFrame() { // Disable culling for ui rendering(Sprite from LeGo needs it since it use CCW instead of CW order) SetDefaultStates(); rendererState.RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_NONE; - rendererState.RasterizerState.SetDirty(); UpdateRenderStates(); GetContext()->PSSetSamplers( 0, 1, ClampSamplerState.GetAddressOf() ); @@ -1886,10 +1915,8 @@ XRESULT D3D11GraphicsEngine::DrawScreenFade( void* c ) { // Default states SetDefaultStates(); Engine::GAPI->GetRendererState().BlendState.SetAlphaBlending(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); Engine::GAPI->GetRendererState().DepthState.DepthBufferCompareFunc = GothicDepthBufferStateInfo::CF_COMPARISON_ALWAYS; Engine::GAPI->GetRendererState().DepthState.DepthWriteEnabled = false; - Engine::GAPI->GetRendererState().DepthState.SetDirty(); SetActivePixelShader( PShaderID::PS_PFX_CinemaScope ); ActivePS->Apply(); @@ -1933,28 +1960,23 @@ XRESULT D3D11GraphicsEngine::DrawScreenFade( void* c ) { case zRND_ALPHA_FUNC_BLEND_TEST: case zRND_ALPHA_FUNC_SUB: { Engine::GAPI->GetRendererState().BlendState.SetAlphaBlending(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); break; } case zRND_ALPHA_FUNC_ADD: { Engine::GAPI->GetRendererState().BlendState.SetAdditiveBlending(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); break; } case zRND_ALPHA_FUNC_MUL: { Engine::GAPI->GetRendererState().BlendState.SetModulateBlending(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); break; } case zRND_ALPHA_FUNC_MUL2: { Engine::GAPI->GetRendererState().BlendState.SetModulate2Blending(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); break; } } Engine::GAPI->GetRendererState().DepthState.DepthBufferCompareFunc = GothicDepthBufferStateInfo::CF_COMPARISON_ALWAYS; Engine::GAPI->GetRendererState().DepthState.DepthWriteEnabled = false; - Engine::GAPI->GetRendererState().DepthState.SetDirty(); if ( haveTexture ) SetActivePixelShader( PShaderID::PS_PFX_Alpha_Blend ); @@ -1981,7 +2003,6 @@ XRESULT D3D11GraphicsEngine::DrawScreenFade( void* c ) { // Disable culling for ui rendering(Sprite from LeGo needs it since it use CCW instead of CW order) SetDefaultStates(); Engine::GAPI->GetRendererState().RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_NONE; - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); UpdateRenderStates(); } return XR_SUCCESS; @@ -2833,8 +2854,8 @@ XRESULT D3D11GraphicsEngine::UnbindTexture( int slot ) { /** Recreates the renderstates */ XRESULT D3D11GraphicsEngine::UpdateRenderStates() { - if ( Engine::GAPI->GetRendererState().BlendState.StateDirty && - Engine::GAPI->GetRendererState().BlendState.Hash != FFBlendStateHash ) { + Engine::GAPI->GetRendererState().BlendState.ComputeHash(); + if ( Engine::GAPI->GetRendererState().BlendState.Hash != FFBlendStateHash ) { D3D11BlendStateInfo* state = static_cast (GothicStateCache::s_BlendStateMap[Engine::GAPI->GetRendererState().BlendState]); @@ -2849,13 +2870,12 @@ XRESULT D3D11GraphicsEngine::UpdateRenderStates() { FFBlendState = state->State.Get(); FFBlendStateHash = Engine::GAPI->GetRendererState().BlendState.Hash; - Engine::GAPI->GetRendererState().BlendState.StateDirty = false; GetContext()->OMSetBlendState( FFBlendState.Get(), float4( 0, 0, 0, 0 ).toPtr(), 0xFFFFFFFF ); } - if ( Engine::GAPI->GetRendererState().RasterizerState.StateDirty && - Engine::GAPI->GetRendererState().RasterizerState.Hash != + Engine::GAPI->GetRendererState().RasterizerState.ComputeHash(); + if ( Engine::GAPI->GetRendererState().RasterizerState.Hash != FFRasterizerStateHash ) { D3D11RasterizerStateInfo* state = static_cast (GothicStateCache::s_RasterizerStateMap[Engine::GAPI->GetRendererState().RasterizerState]); @@ -2871,12 +2891,11 @@ XRESULT D3D11GraphicsEngine::UpdateRenderStates() { FFRasterizerState = state->State.Get(); FFRasterizerStateHash = Engine::GAPI->GetRendererState().RasterizerState.Hash; - Engine::GAPI->GetRendererState().RasterizerState.StateDirty = false; GetContext()->RSSetState( FFRasterizerState.Get() ); } - if ( Engine::GAPI->GetRendererState().DepthState.StateDirty && - Engine::GAPI->GetRendererState().DepthState.Hash != + Engine::GAPI->GetRendererState().DepthState.ComputeHash(); + if ( Engine::GAPI->GetRendererState().DepthState.Hash != FFDepthStencilStateHash ) { D3D11DepthBufferState* state = static_cast (GothicStateCache::s_DepthBufferMap[Engine::GAPI->GetRendererState().DepthState]); @@ -2892,7 +2911,6 @@ XRESULT D3D11GraphicsEngine::UpdateRenderStates() { FFDepthStencilState = state->State.Get(); FFDepthStencilStateHash = Engine::GAPI->GetRendererState().DepthState.Hash; - Engine::GAPI->GetRendererState().DepthState.StateDirty = false; GetContext()->OMSetDepthStencilState( FFDepthStencilState.Get(), 0 ); } @@ -2988,16 +3006,15 @@ XRESULT D3D11GraphicsEngine::OnStartWorldRendering() { GetContext()->CSSetSamplers( 0, 1, DefaultSamplerState.GetAddressOf() ); // Update view distances - InfiniteRangeConstantBuffer->UpdateBuffer( float4( FLT_MAX, 0, 0, 0 ).toPtr() ); + InfiniteRangeConstantBuffer->UpdateBuffer( float4( FLT_MAX, m_SamplerMipBias, 0, 0 ).toPtr() ); OutdoorSmallVobsConstantBuffer->UpdateBuffer( float4( rendererState.RendererSettings.OutdoorSmallVobDrawRadius, - 0, 0, 0 ).toPtr() ); + m_SamplerMipBias, 0, 0 ).toPtr() ); OutdoorVobsConstantBuffer->UpdateBuffer( float4( rendererState.RendererSettings.OutdoorVobDrawRadius, - 0, 0, 0 ).toPtr() ); + m_SamplerMipBias, 0, 0 ).toPtr() ); rendererState.RasterizerState.FrontCounterClockwise = false; - rendererState.RasterizerState.SetDirty(); RGResourceHandle colorResource; graph.AddPass( L"Initialize Buffers", [&]( RGBuilder& builder, RenderPass& pass ) { @@ -3020,7 +3037,7 @@ XRESULT D3D11GraphicsEngine::OnStartWorldRendering() { GetContext()->ClearRenderTargetView( graph.GetPhysicalTexture( colorResource )->GetRenderTargetView().Get(), reinterpret_cast(&fogColor) ); }; }); - + if ( rendererState.RendererSettings.DrawSky ) { graph.AddPass( L"Draw Sky", [&]( RGBuilder& builder, RenderPass& pass ) { //// Setup / Declare @@ -3028,13 +3045,13 @@ XRESULT D3D11GraphicsEngine::OnStartWorldRendering() { //albedoTarget = builder.CreateTexture( albedoDesc ); builder.Write( colorResource ); - pass.m_executeCallback = [this, colorResource](const RenderGraph& graph)->void { + pass.m_executeCallback = [this, colorResource]( const RenderGraph& graph )->void { // Draw back of the sky if outdoor GetContext()->OMSetRenderTargets( 1, graph.GetPhysicalTexture( colorResource )->GetRenderTargetView().GetAddressOf(), nullptr ); - + DrawSky(); }; - }); + } ); } RGResourceHandle normalsResource; @@ -3043,7 +3060,7 @@ XRESULT D3D11GraphicsEngine::OnStartWorldRendering() { graph.AddPass( L"G-Buffer Pass", [&]( RGBuilder& builder, RenderPass& pass ) { // Setup / Declare auto size = GetResolution(); - normalsResource = builder.CreateTexture({ static_cast(size.x), static_cast(size.y), DXGI_FORMAT_R8G8B8A8_SNORM, L"GBufferNormals" }); + normalsResource = builder.CreateTexture({ static_cast(size.x), static_cast(size.y), DXGI_FORMAT_R16G16_SNORM, L"GBufferNormals" }); specularResource = builder.CreateTexture({ static_cast(size.x), static_cast(size.y), DXGI_FORMAT_R16G16_FLOAT, L"GBufferSpecular" }); reactiveMaskResource = builder.CreateTexture({ static_cast(size.x), static_cast(size.y), DXGI_FORMAT_R8_UNORM, L"ReactiveMask" }); @@ -3114,10 +3131,11 @@ XRESULT D3D11GraphicsEngine::OnStartWorldRendering() { builder.Read( specularResource ); builder.Write( backBufferHandle ); - pass.m_executeCallback = [this, colorResource, normalsResource, specularResource](const RenderGraph& graph)-> void { + pass.m_executeCallback = [this, colorResource, normalsResource, specularResource, backBufferHandle](const RenderGraph& graph)-> void { auto colorTexture = graph.GetPhysicalTexture(colorResource); auto normalsTexture = graph.GetPhysicalTexture(normalsResource); auto specularTexture = graph.GetPhysicalTexture(specularResource); + auto backbuffer = graph.GetPhysicalTexture( backBufferHandle ); if ( Engine::GAPI->GetRendererState().RendererSettings.EnableShadows ) { // Cascades only get rendered if this is enabled. @@ -3130,13 +3148,15 @@ XRESULT D3D11GraphicsEngine::OnStartWorldRendering() { *colorTexture, *normalsTexture, *specularTexture, - *GetDepthBufferCopy()); + *GetDepthBufferCopy(), + backbuffer->GetRenderTargetView().Get(), + GetDepthBuffer()->GetDepthStencilView().Get() ); if ( !Engine::GAPI->GetRendererState().RendererSettings.FixViewFrustum ) { m_FrameLights.clear(); } }; - }); - + }); + graph.AddPass( L"Draw Frame AlphaMeshes", [&]( RGBuilder& builder, RenderPass& pass ) { // Setup / Declare builder.Write( backBufferHandle ); @@ -3295,19 +3315,17 @@ XRESULT D3D11GraphicsEngine::OnStartWorldRendering() { Engine::GAPI->GetLoadedWorldInfo()->BspTree->GetBspTreeMode() == zBSP_MODE_OUTDOOR) { graph.AddPass( L"Draw Godrays", [&]( RGBuilder& builder, RenderPass& pass ) { - builder.Read( normalsResource ); builder.Read( backBufferHandle ); builder.Write( backBufferHandle ); - pass.m_executeCallback = [this, backBufferHandle, normalsResource](const RenderGraph& graph) { + pass.m_executeCallback = [this, backBufferHandle](const RenderGraph& graph) { // Unbind temporary backbuffer copy Microsoft::WRL::ComPtr srv; GetContext()->PSSetShaderResources( 5, 1, srv.GetAddressOf() ); auto backbufferResource = graph.GetPhysicalTexture(backBufferHandle); - auto normalsTexture = graph.GetPhysicalTexture(normalsResource); - PfxRenderer->RenderGodRays(backbufferResource->GetShaderResView().Get(), normalsTexture->GetShaderResView().Get()); + PfxRenderer->RenderGodRays(backbufferResource->GetShaderResView().Get(), GetDepthBufferCopy()->GetShaderResView().Get()); // Godrays bind a different sampler GetContext()->PSSetSamplers( 0, 1, DefaultSamplerState.GetAddressOf() ); }; @@ -3671,7 +3689,6 @@ XRESULT D3D11GraphicsEngine::OnStartWorldRendering() { // Disable culling for ui rendering(Sprite from LeGo needs it since it use CCW instead of CW order) SetDefaultStates(); rendererState.RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_NONE; - rendererState.RasterizerState.SetDirty(); UpdateRenderStates(); GetContext()->PSSetSamplers( 0, 1, ClampSamplerState.GetAddressOf() ); @@ -3795,7 +3812,6 @@ XRESULT D3D11GraphicsEngine::DrawMeshInfoListAlphablended( // Setup renderstates Engine::GAPI->GetRendererState().RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_NONE; - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); XMMATRIX view = Engine::GAPI->GetViewMatrixXM(); Engine::GAPI->SetViewTransformXM( view ); @@ -3861,10 +3877,8 @@ XRESULT D3D11GraphicsEngine::DrawMeshInfoListAlphablended( if ( alphaFunc == zMAT_ALPHA_FUNC_ADD ) Engine::GAPI->GetRendererState().BlendState.SetAdditiveBlending(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); Engine::GAPI->GetRendererState().DepthState.DepthWriteEnabled = false; - Engine::GAPI->GetRendererState().DepthState.SetDirty(); UpdateRenderStates(); lastAlphaFunc = alphaFunc; @@ -3886,9 +3900,7 @@ XRESULT D3D11GraphicsEngine::DrawMeshInfoListAlphablended( } Engine::GAPI->GetRendererState().DepthState.DepthWriteEnabled = true; - Engine::GAPI->GetRendererState().DepthState.SetDirty(); Engine::GAPI->GetRendererState().BlendState.ColorWritesEnabled = false; - Engine::GAPI->GetRendererState().BlendState.SetDirty(); UpdateRenderStates(); @@ -3909,13 +3921,6 @@ XRESULT D3D11GraphicsEngine::DrawWorldMesh_Indirect( bool noTextures ) { if ( !Engine::GAPI->GetRendererState().RendererSettings.DrawWorldMesh ) return XR_SUCCESS; - struct MDI_DrawArgs - { - unsigned int DrawCount; - unsigned int AlignedByteOffsetForArgs; - MaterialInfo* MeshMaterialInfo; - }; - // Setup default renderstates SetDefaultStates(); @@ -3923,6 +3928,18 @@ XRESULT D3D11GraphicsEngine::DrawWorldMesh_Indirect( bool noTextures ) { Engine::GAPI->SetViewTransformXM( view ); Engine::GAPI->ResetWorldTransform(); + // Draw atlas path first (handles opaque + alpha-test submeshes that were atlased) + if ( Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.FeatureSet.EnableAtlasWorldMesh ) { + m_MeshAtlasPass->Draw(); + } + + struct MDI_DrawArgs + { + unsigned int DrawCount; + unsigned int AlignedByteOffsetForArgs; + MaterialInfo* MeshMaterialInfo; + }; + SetActivePixelShader( PShaderID::PS_Diffuse ); SetActiveVertexShader( VShaderID::VS_Ex ); @@ -3972,6 +3989,10 @@ XRESULT D3D11GraphicsEngine::DrawWorldMesh_Indirect( bool noTextures ) { for ( auto const& renderItem : renderList ) { for ( auto const& worldMesh : renderItem->WorldMeshes ) { + // Skip submeshes already drawn by the atlas path + if ( m_MeshAtlasPass->IsSubmeshAtlased( worldMesh.second ) ) + continue; + zCTexture* aniTex = worldMesh.first.Material->GetTexture(); if ( !aniTex ) continue; @@ -4182,6 +4203,10 @@ XRESULT D3D11GraphicsEngine::DrawWorldMesh( bool noTextures ) { Engine::GAPI->SetViewTransformXM( view ); Engine::GAPI->ResetWorldTransform(); + if ( Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.FeatureSet.EnableAtlasWorldMesh ) { + m_MeshAtlasPass->Draw(); + } + SetActivePixelShader( PShaderID::PS_Diffuse ); SetActiveVertexShader( VShaderID::VS_Ex ); @@ -4227,6 +4252,10 @@ XRESULT D3D11GraphicsEngine::DrawWorldMesh( bool noTextures ) { for ( auto const& renderItem : renderList ) { for ( auto const& worldMesh : renderItem->WorldMeshes ) { + // Skip submeshes already drawn by the atlas path + if ( m_MeshAtlasPass->IsSubmeshAtlased( worldMesh.second ) ) + continue; + if ( worldMesh.first.Material ) { zCTexture* aniTex = worldMesh.first.Material->GetTexture(); if ( !aniTex ) continue; @@ -4380,7 +4409,6 @@ void D3D11GraphicsEngine::DrawWaterSurfaces() { // Setup render states for z-prepass Engine::GAPI->GetRendererState().BlendState.ColorWritesEnabled = false; // Rasterization is faster without writes - Engine::GAPI->GetRendererState().BlendState.SetDirty(); UpdateRenderStates(); // Bind vertex water shader @@ -4411,10 +4439,8 @@ void D3D11GraphicsEngine::DrawWaterSurfaces() { // Disable depth writes after z-prepass Engine::GAPI->GetRendererState().BlendState.ColorWritesEnabled = true; - Engine::GAPI->GetRendererState().BlendState.SetDirty(); Engine::GAPI->GetRendererState().DepthState.DepthWriteEnabled = false; // Rasterization is faster without writes - Engine::GAPI->GetRendererState().DepthState.SetDirty(); UpdateRenderStates(); // Bind pixel water shader @@ -4478,11 +4504,9 @@ void XM_CALLCONV D3D11GraphicsEngine::DrawWorldAround( cullFront ? GothicRasterizerStateInfo::CM_CULL_FRONT : GothicRasterizerStateInfo::CM_CULL_NONE; Engine::GAPI->GetRendererState().RasterizerState.DepthClipEnable = true; - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); Engine::GAPI->GetRendererState().DepthState.SetDefault(); Engine::GAPI->GetRendererState().DepthState.DepthBufferCompareFunc = GothicDepthBufferStateInfo::ECompareFunc::CF_COMPARISON_LESS_EQUAL; - Engine::GAPI->GetRendererState().DepthState.SetDirty(); Context->PSSetShaderResources( 0, 6, s_nullSRVs ); @@ -4793,11 +4817,9 @@ void XM_CALLCONV D3D11GraphicsEngine::DrawWorldAround_Layered( cullFront ? GothicRasterizerStateInfo::CM_CULL_FRONT : GothicRasterizerStateInfo::CM_CULL_NONE; Engine::GAPI->GetRendererState().RasterizerState.DepthClipEnable = true; - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); Engine::GAPI->GetRendererState().DepthState.SetDefault(); Engine::GAPI->GetRendererState().DepthState.DepthBufferCompareFunc = GothicDepthBufferStateInfo::ECompareFunc::CF_COMPARISON_LESS_EQUAL; - Engine::GAPI->GetRendererState().DepthState.SetDirty(); Context->PSSetShaderResources( 0, 6, s_nullSRVs ); @@ -5103,10 +5125,6 @@ void D3D11GraphicsEngine::ShadowPass_DrawWorldMesh_Indirect(const std::vectorGetRendererState().GraphicsState.FF_AlphaRef; bool linearDepth = (Engine::GAPI->GetRendererState().GraphicsState.FF_GSwitches & GSWITCH_LINEAR_DEPTH) != 0; - - auto drawMultiIndexedInstancedIndirect = Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.FeatureSet.UseMDI - ? DrawMultiIndexedInstancedIndirect - : Stub_DrawMultiIndexedInstancedIndirect; if ( Engine::GAPI->GetRendererState().RendererSettings.FastShadows ) { @@ -5181,7 +5199,7 @@ void D3D11GraphicsEngine::ShadowPass_DrawWorldMesh_Indirect(const std::vector(opaqueDrawArgs.size()), WorldMeshIndirectBuffer->GetIndirectBuffer().Get(), 0, @@ -5318,11 +5336,9 @@ void XM_CALLCONV D3D11GraphicsEngine::DrawWorldAroundForWorldShadow( FXMVECTOR p GothicRasterizerStateInfo::CM_CULL_NONE; Engine::GAPI->GetRendererState().RasterizerState.DepthClipEnable = true; - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); Engine::GAPI->GetRendererState().DepthState.SetDefault(); Engine::GAPI->GetRendererState().DepthState.DepthBufferCompareFunc = GothicDepthBufferStateInfo::ECompareFunc::CF_COMPARISON_LESS_EQUAL; - Engine::GAPI->GetRendererState().DepthState.SetDirty(); XMMATRIX view = Engine::GAPI->GetViewMatrixXM(); @@ -5330,7 +5346,7 @@ void XM_CALLCONV D3D11GraphicsEngine::DrawWorldAroundForWorldShadow( FXMVECTOR p Engine::GAPI->SetViewTransformXM( view ); // Set shader - SetActivePixelShader( PShaderID::PS_DiffuseAlphaTestShadows ); + SetActivePixelShader( PShaderID::PS_DiffuseAlphaTest ); auto defaultPS = ActivePS; SetActiveVertexShader( VShaderID::VS_Ex ); @@ -5402,241 +5418,354 @@ void XM_CALLCONV D3D11GraphicsEngine::DrawWorldAroundForWorldShadow( FXMVECTOR p } if ( Engine::GAPI->GetRendererState().RendererSettings.DrawVOBs ) { - static std::vector potentialCasters; - std::vector& vobs = potentialCasters; - if (params.CascadeIndex != -1) { - auto renderQueue = ShadowMaps->GetRenderQueue( params.CascadeIndex ); - renderQueue->ProcessQueue(); - - vobs = renderQueue->GetVobs(); - } else { - static std::vector _1; - static std::vector _2; - potentialCasters.reserve(1024); - potentialCasters.clear(); - - LegacyRenderQueueProxy q(potentialCasters, _1, _2); - RndCullContext ctx; - ctx.queue = &q; - ctx.cameraPosition = Engine::GAPI->GetCameraPosition(); - ctx.stage = RenderStage::STAGE_DRAW_WORLD; - ctx.frustum = currentFrustum; - ctx.drawDistances.OutdoorVobs = Engine::GAPI->GetRendererState().RendererSettings.OutdoorVobDrawRadius; - ctx.drawDistances.OutdoorVobsSmall = Engine::GAPI->GetRendererState().RendererSettings.OutdoorSmallVobDrawRadius; - ctx.drawDistances.IndoorVobs = Engine::GAPI->GetRendererState().RendererSettings.IndoorVobDrawRadius; - ctx.drawDistances.VisualFX = Engine::GAPI->GetRendererState().RendererSettings.VisualFXDrawRadius; - Engine::GAPI->CollectVisibleVobs( ctx ); - } - - // clear any residue of main render pass - for ( auto const& staticMeshVisual : Engine::GAPI->GetStaticMeshVisuals() ) { - staticMeshVisual.second->StartNewFrame(); - } - for ( auto& it : vobs) { - // process any vobs only visible in this cascade - VobInstanceInfo vii = {}; - vii.world = it->WorldMatrix; - vii.prevWorld = it->HasValidPrevMatrix ? it->PrevWorldMatrix : it->WorldMatrix; - vii.color = it->GroundColor; - vii.windStrenth = 0.0f; - vii.canBeAffectedByPlayer = 0; - - zTAnimationMode aniMode = it->Vob->GetVisualAniMode(); - if ( aniMode != zVISUAL_ANIMODE_NONE ) { - vii.canBeAffectedByPlayer = (!it->Vob->GetDynColl() ? 1.0f : 0.0f); - GothicAPI::ProcessVobAnimation( it->Vob, aniMode, vii ); - } + bool drawStaticVobs = true; + if ( Engine::GAPI->GetRendererState().RendererSettings.UseIndirectVobShadows && m_VobAtlasPass->IsReady() ) { + + SetActivePixelShader( PShaderID::PS_DiffuseAtlasAlphaTestShadows ); + defaultPS = ActivePS; + + // GPU indirect path: reuse the VOB atlas pass with the cascade/shadow frustum. + // BC1 groups render depth-only (no PS); BC2 groups use the alpha-test PS. + m_VobAtlasPass->Draw( currentFrustum, /*bindPS=*/false ); + drawStaticVobs = false; + + SetActivePixelShader( PShaderID::PS_DiffuseAlphaTestShadows ); + defaultPS = ActivePS; + } + + static std::vector dynamicVobCasters; + static std::vector _1; + static std::vector _2; + dynamicVobCasters.reserve( 1024 ); + dynamicVobCasters.clear(); + + LegacyRenderQueueProxy q( dynamicVobCasters, _1, _2 ); + RndCullContext ctx; + ctx.queue = &q; + ctx.cameraPosition = Engine::GAPI->GetCameraPosition(); + ctx.stage = RenderStage::STAGE_DRAW_SHADOWS; + ctx.frustum = currentFrustum; + ctx.drawDistances.OutdoorVobs = Engine::GAPI->GetRendererState().RendererSettings.OutdoorVobDrawRadius; + ctx.drawDistances.OutdoorVobsSmall = Engine::GAPI->GetRendererState().RendererSettings.OutdoorSmallVobDrawRadius; + ctx.drawDistances.IndoorVobs = 0; + ctx.drawDistances.VisualFX = 0; + Engine::GAPI->CollectVisibleVobs( ctx, (EBspTreeCollectFlags)(EBspTreeCollectFlags::COLLECT_DYNAMIC_VOBS) ); + + struct BatchableStaticVobs { + MeshVisualInfo* VisualInfo; + std::vector Instances; + uint32_t StartInstanceNum; + }; - reinterpret_cast(it->VisualInfo)->Instances.push_back( vii ); - } + if (drawStaticVobs) { + // clear any residue of main render pass + const auto& vobs = m_StaticVobs; + std::vector outRenderQueue{}; + VobCulling::CullAndGatherStaticVOBs( m_StaticVobsAABBs, vobs, currentFrustum.GetPlanes()._Elems, outRenderQueue ); + + std::sort( outRenderQueue.begin(), outRenderQueue.end(), + []( const StaticVobRenderItem& a, const StaticVobRenderItem& b ) { + return a.mvi->Visual < a.mvi->Visual; + } ); + + // Group vobs by visual and prepare instance data + std::vector batchables; + batchables.reserve( outRenderQueue.size() ); + + zCVisual* lastVisual = nullptr; + for ( auto& itm : outRenderQueue ) { + auto v = vobs[itm.instanceIndex]; + + if ( v->VisualInfo->Visual != lastVisual ) { + // New visual, reset instance data + lastVisual = v->VisualInfo->Visual; + batchables.push_back( { reinterpret_cast(v->VisualInfo), std::vector() } ); + batchables.back().Instances.reserve( 10 ); + } + BatchableStaticVobs& batch = batchables.back(); + + MeshVisualInfo* visualInfo = batch.VisualInfo; + VobInstanceInfo vii = {}; + vii.world = v->WorldMatrix; + vii.prevWorld = v->HasValidPrevMatrix ? v->PrevWorldMatrix : v->WorldMatrix; + vii.color = v->GroundColor; + vii.windStrenth = 0.0f; + vii.canBeAffectedByPlayer = 0; + zTAnimationMode aniMode = v->Vob->GetVisualAniMode(); + if ( aniMode != zVISUAL_ANIMODE_NONE ) { + vii.canBeAffectedByPlayer = (!v->Vob->GetDynColl() ? 1.0f : 0.0f); + GothicAPI::ProcessVobAnimation( v->Vob, aniMode, vii ); + } + batch.Instances.push_back( vii ); + } - auto _ = START_TIMING( timer_labels_vobs[timerLabelIndex] ); - auto _1 = RecordGraphicsEvent( L"Shadows::DrawVOBs" ); + auto _ = START_TIMING( timer_labels_vobs[timerLabelIndex] ); + auto _1 = RecordGraphicsEvent( L"DrawVOBs" ); - size_t ByteWidth = DynamicInstancingBuffer->GetSizeInBytes(); + size_t ByteWidth = DynamicInstancingBuffer->GetSizeInBytes(); - if ( ByteWidth < sizeof( VobInstanceInfo ) * vobs.size() ) { - if ( Engine::GAPI->GetRendererState().RendererSettings.EnableDebugLog ) - LogInfo() << "Instancing buffer too small (" << ByteWidth - << "), need " << sizeof( VobInstanceInfo ) * vobs.size() - << " bytes. Recreating buffer."; + if ( ByteWidth < sizeof( VobInstanceInfo ) * vobs.size() ) { + if ( Engine::GAPI->GetRendererState().RendererSettings.EnableDebugLog ) + LogInfo() << "Instancing buffer too small (" << ByteWidth + << "), need " << sizeof( VobInstanceInfo ) * vobs.size() + << " bytes. Recreating buffer."; - // Buffer too small, recreate it - DynamicInstancingBuffer->Init( - nullptr, sizeof( VobInstanceInfo ) * vobs.size(), - D3D11VertexBuffer::B_VERTEXBUFFER, D3D11VertexBuffer::U_DYNAMIC, - D3D11VertexBuffer::CA_WRITE ); + // Buffer too small, recreate it + DynamicInstancingBuffer->Init( + nullptr, sizeof( VobInstanceInfo ) * vobs.size(), + D3D11VertexBuffer::B_VERTEXBUFFER, D3D11VertexBuffer::U_DYNAMIC, + D3D11VertexBuffer::CA_WRITE ); - SetDebugName( DynamicInstancingBuffer->GetShaderResourceView().Get(), "DynamicInstancingBuffer->ShaderResourceView" ); - SetDebugName( DynamicInstancingBuffer->GetVertexBuffer().Get(), "DynamicInstancingBuffer->VertexBuffer" ); - } - - std::vector activeVisuals; - activeVisuals.reserve(256); // Reserve enough memory to avoid allocations - for ( auto const& pair : Engine::GAPI->GetStaticMeshVisuals() ) { - if ( !pair.second->Instances.empty() ) { - activeVisuals.push_back(pair.second); + SetDebugName( DynamicInstancingBuffer->GetShaderResourceView().Get(), "DynamicInstancingBuffer->ShaderResourceView" ); + SetDebugName( DynamicInstancingBuffer->GetVertexBuffer().Get(), "DynamicInstancingBuffer->VertexBuffer" ); } - } - - byte* data; - UINT size; - if ( SUCCEEDED( DynamicInstancingBuffer->Map( D3D11VertexBuffer::M_WRITE_DISCARD, - reinterpret_cast(&data), &size ) ) ) { + + byte* data; + UINT size; UINT loc = 0; - for ( auto const& staticMeshVisual : activeVisuals ) { - staticMeshVisual->StartInstanceNum = loc; - memcpy( data + loc * sizeof( VobInstanceInfo ), staticMeshVisual->Instances.data(), - sizeof( VobInstanceInfo ) * staticMeshVisual->Instances.size() ); - loc += staticMeshVisual->Instances.size(); + if ( !SUCCEEDED( DynamicInstancingBuffer->Map( D3D11VertexBuffer::M_WRITE_DISCARD, + reinterpret_cast(&data), &size ) ) ) { + LogError() << "Failed to map dynamic instancing buffer for writing!"; + return; + } + for ( auto& staticMeshVisual : batchables ) { + staticMeshVisual.StartInstanceNum = loc; + memcpy( data + loc * sizeof( VobInstanceInfo ), staticMeshVisual.Instances.data(), + sizeof( VobInstanceInfo ) * staticMeshVisual.Instances.size() ); + loc += staticMeshVisual.Instances.size(); } DynamicInstancingBuffer->Unmap(); - } else { - LogError() << "Failed to map dynamic instancing buffer for vobs."; - } - // Apply instancing shader - SetActiveVertexShader( VShaderID::VS_ExInstancedObj ); - // SetActivePixelShader("PS_DiffuseAlphaTest"); - ActiveVS->Apply(); + // Apply instancing shader + SetActiveVertexShader( VShaderID::VS_ExInstancedObj ); + // SetActivePixelShader("PS_DiffuseAlphaTest"); + ActiveVS->Apply(); - if ( !linearDepth ) // Only unbind when not rendering linear depth - { - // Unbind PS - Context->PSSetShader( nullptr, nullptr, 0 ); - } + if ( !linearDepth ) // Only unbind when not rendering linear depth + { + // Unbind PS + Context->PSSetShader( nullptr, nullptr, 0 ); + } - GraphicsShaderConstantBuffer windBuffer = {}; - if ( ActiveVS ) { - windBuffer = ActiveVS->GetBuffer( "WindParams" ); - windBuffer.Bind(); - } + GraphicsShaderConstantBuffer windParamsCB = {}; + if ( ActiveVS ) { + windParamsCB = ActiveVS->GetBuffer( "WindParams" ).Bind(); + } - XMFLOAT3 vPlayerPosition = Engine::GAPI->GetPlayerVob() ? Engine::GAPI->GetPlayerVob()->GetPositionWorld() : XMFLOAT3( 0, 0, 0 ); - g_windBuffer.playerPos = float3( vPlayerPosition.x, vPlayerPosition.y, vPlayerPosition.z ); + XMFLOAT3 vPlayerPosition = Engine::GAPI->GetPlayerVob() ? Engine::GAPI->GetPlayerVob()->GetPositionWorld() : XMFLOAT3( 0, 0, 0 ); + g_windBuffer.playerPos = float3( vPlayerPosition.x, vPlayerPosition.y, vPlayerPosition.z ); - UINT dynOffset[] = { 0 }; - UINT dynuStride[] = { sizeof( VobInstanceInfo ) }; + // Draw all vobs the player currently sees + for ( auto const& b : batchables ) { + if ( b.Instances.empty() ) continue; + auto staticMeshVisual = b.VisualInfo; - ID3D11Buffer* buffers[1] = { - DynamicInstancingBuffer->GetVertexBuffer().Get() - }; + g_windBuffer.minHeight = staticMeshVisual->BBox.Min.y; + g_windBuffer.maxHeight = staticMeshVisual->BBox.Max.y; - GetContext()->IASetVertexBuffers( 1, 1, buffers, dynuStride, dynOffset ); + if ( ActiveVS ) { + windParamsCB.Update( &g_windBuffer ); + } - // Sort visuals by whether they need alpha testing to minimize shader switches - if ( alphaRef > 0.0f ) { - std::sort( activeVisuals.begin(), activeVisuals.end(), [alphaRef, colorWritesEnabled]( const MeshVisualInfo* a, const MeshVisualInfo* b ) { - return a->NeedsAlphaTesting < b->NeedsAlphaTesting || (a->NeedsAlphaTesting == b->NeedsAlphaTesting && a->Visual < b->Visual); - } ); - } else { - std::sort( activeVisuals.begin(), activeVisuals.end(), [alphaRef, colorWritesEnabled]( const MeshVisualInfo* a, const MeshVisualInfo* b ) { - return a->Visual < b->Visual; - } ); - } + zCTexture* previousTx = nullptr; + for ( auto const& itt : staticMeshVisual->MeshesByTexture ) { + std::vector& mlist = staticMeshVisual->MeshesByTexture[itt.first]; + if ( mlist.empty() ) continue; + + zCTexture* tx = itt.first.Texture; + bool bindTexture = previousTx != tx + && tx + && (tx->HasAlphaChannel() || colorWritesEnabled); + + // Check for alphablend + bool blendAdd = + itt.first.Material->GetAlphaFunc() == zMAT_ALPHA_FUNC_ADD; + bool blendBlend = + itt.first.Material->GetAlphaFunc() == zMAT_ALPHA_FUNC_BLEND; + // if one part of the mesh uses blending, all do, which means that + // the mesh likely is transparent and can't cast shadows + if ( blendAdd || blendBlend ) { + continue; + } - // Draw all vobs the player currently sees - D3D11PShader* currPs = nullptr; + for ( unsigned int i = 0; i < mlist.size(); i++ ) { + // Bind texture + if ( bindTexture ) { + if ( alphaRef > 0.0f && tx->CacheIn( 0.6f ) == zRES_CACHED_IN ) { + tx->Bind( 0 ); + ActivePS->Apply(); + previousTx = tx; + } else + continue; + } else { + if ( !linearDepth ) // Only unbind when not rendering linear depth + { + // Unbind PS + Context->PSSetShader( nullptr, nullptr, 0 ); + } + } - for ( auto const& staticMeshVisual : activeVisuals ) { - if ( staticMeshVisual->Instances.empty() ) continue; - - g_windBuffer.minHeight = staticMeshVisual->BBox.Min.y; - g_windBuffer.maxHeight = staticMeshVisual->BBox.Max.y; + MeshInfo* mi = mlist[i]; - windBuffer.Update( &g_windBuffer ); + // Draw batch + DrawInstanced( mi->MeshVertexBuffer, mi->MeshIndexBuffer, + mi->Indices.size(), DynamicInstancingBuffer.get(), + sizeof( VobInstanceInfo ), b.Instances.size(), + sizeof( ExVertexStruct ), b.StartInstanceNum ); - bool doReset = true; - zCTexture* previousTx = nullptr; - for ( auto const& itt : staticMeshVisual->MeshesByTexture ) { - std::vector& mlist = staticMeshVisual->MeshesByTexture[itt.first]; - if ( mlist.empty() ) continue; - - // Check for alphablend - bool blendAdd = - itt.first.Material->GetAlphaFunc() == zMAT_ALPHA_FUNC_ADD; - bool blendBlend = - itt.first.Material->GetAlphaFunc() == zMAT_ALPHA_FUNC_BLEND; - // if one part of the mesh uses blending, all do, which means that - // the mesh likely is transparent and can't cast shadows - if ( !doReset || blendAdd || blendBlend ) { - doReset = false; - continue; + Engine::GAPI->GetRendererState().RendererInfo.FrameDrawnVobs += + b.Instances.size(); + } } + } + } // end else (CPU indirect path) - zCTexture* tx = itt.first.Texture; - bool bindTexture = previousTx != tx - && tx - && (tx->HasAlphaChannel() || colorWritesEnabled) - && alphaRef > 0.0f; + // Draw dynamic vobs (spawned at runtime, not part of m_StaticVobs or atlas) + if ( !dynamicVobCasters.empty() ) { + // Group by visual for instanced drawing + + PShaderID alphaTestShader = (GetRenderingStage() == DES_SHADOWMAP) || (GetRenderingStage() == DES_SHADOWMAP_CUBE) + ? PShaderID::PS_DiffuseAtlasAlphaTestShadows + : PShaderID::PS_DiffuseAtlasAlphaTest; - // Bind texture - if ( bindTexture ) { - if ( tx->CacheIn( 0.6f ) == zRES_CACHED_IN ) { - auto t = tx->GetSurface()->GetEngineTexture()->GetShaderResourceView().Get(); - Context->PSSetShaderResources( 0, 1, &t ); - auto nextPs = ActivePS.get(); - if ( currPs != nextPs ) { - currPs = nextPs; - ActivePS->Apply(); - } - previousTx = tx; - } else - continue; - } else { - if ( !linearDepth ) // Only unbind when not rendering linear depth - { - // Unbind PS - if ( currPs != nullptr ) { - Context->PSSetShader( nullptr, nullptr, 0 ); - currPs = nullptr; - } + std::vector dynBatches; + std::unordered_map batchIndex; + batchIndex.reserve( dynamicVobCasters.size() ); // usually single, but can be multiple + + for ( auto* v : dynamicVobCasters ) { + if ( !v->VisualInfo ) continue; + MeshVisualInfo* vi = reinterpret_cast( v->VisualInfo ); + + auto [it, inserted] = batchIndex.emplace( vi, dynBatches.size() ); + if ( inserted ) { + dynBatches.push_back( { vi, {}, 0 } ); + } + + VobInstanceInfo vii = {}; + vii.world = v->WorldMatrix; + vii.prevWorld = v->HasValidPrevMatrix ? v->PrevWorldMatrix : v->WorldMatrix; + vii.color = v->GroundColor; + vii.windStrenth = 0.0f; + vii.canBeAffectedByPlayer = 0; + zTAnimationMode aniMode = v->Vob->GetVisualAniMode(); + if ( aniMode != zVISUAL_ANIMODE_NONE ) { + vii.canBeAffectedByPlayer = (!v->Vob->GetDynColl() ? 1.0f : 0.0f); + GothicAPI::ProcessVobAnimation( v->Vob, aniMode, vii ); + } + dynBatches[it->second].Instances.push_back( vii ); + } + + if ( !dynBatches.empty() ) { + // Ensure instancing buffer is large enough + size_t needed = dynamicVobCasters.size() * sizeof( VobInstanceInfo ); + if ( DynamicInstancingBuffer->GetSizeInBytes() < needed ) { + DynamicInstancingBuffer->Init( + nullptr, needed, + D3D11VertexBuffer::B_VERTEXBUFFER, D3D11VertexBuffer::U_DYNAMIC, + D3D11VertexBuffer::CA_WRITE ); + } + + byte* dynData; + UINT dynSize; + UINT dynLoc = 0; + if ( SUCCEEDED( DynamicInstancingBuffer->Map( D3D11VertexBuffer::M_WRITE_DISCARD, + reinterpret_cast(&dynData), &dynSize ) ) ) { + for ( auto& batch : dynBatches ) { + batch.StartInstanceNum = dynLoc; + memcpy( dynData + dynLoc * sizeof( VobInstanceInfo ), batch.Instances.data(), + sizeof( VobInstanceInfo ) * batch.Instances.size() ); + dynLoc += batch.Instances.size(); } + DynamicInstancingBuffer->Unmap(); } - for ( unsigned int i = 0; i < mlist.size(); i++ ) { + // Set up instanced vertex shader (GPU indirect path may have changed shader state) + SetActiveVertexShader( VShaderID::VS_ExInstancedObj ); + ActiveVS->Apply(); + + if ( linearDepth ) { + SetActivePixelShader( PShaderID::PS_LinDepth ); + } else { + SetActivePixelShader( alphaTestShader ); + Context->PSSetShader( nullptr, nullptr, 0 ); + } - MeshInfo* mi = mlist[i]; + // Rebind PS constant buffers (GPU indirect path may have overwritten them) + ActivePS->GetBuffer(0).Update(&Engine::GAPI->GetRendererState().GraphicsState ).Bind(); - // Draw batch + GSky* dynSky = Engine::GAPI->GetSky(); + ActivePS->GetBuffer(1).Update(&dynSky->GetAtmosphereCB() ).Bind(); + InfiniteRangeConstantBuffer->BindToPixelShader( 3 ); - /* Dont re-bind buffer all the time*/ - const auto vb = mi->MeshVertexBuffer; - const auto ib = mi->MeshIndexBuffer; + SetupVS_ExConstantBuffer(); - UINT offset[] = { 0 }; - UINT uStride[] = { sizeof( ExVertexStruct ) }; - ID3D11Buffer* buffers[1] = { - vb->GetVertexBuffer().Get() - }; - - auto numIndices = mi->Indices.size(); - const auto numInstances = staticMeshVisual->Instances.size(); - const auto startInstanceNum = staticMeshVisual->StartInstanceNum; - const auto indexOffset = 0; + GraphicsShaderConstantBuffer windParamsCB = {}; + if ( ActiveVS ) { + windParamsCB = ActiveVS->GetBuffer(1).Bind(); + } - GetContext()->IASetVertexBuffers( 0, 1, buffers, uStride, offset ); + XMFLOAT3 vPlayerPosition = Engine::GAPI->GetPlayerVob() ? Engine::GAPI->GetPlayerVob()->GetPositionWorld() : XMFLOAT3( 0, 0, 0 ); + g_windBuffer.playerPos = float3( vPlayerPosition.x, vPlayerPosition.y, vPlayerPosition.z ); - Context->IASetIndexBuffer( ib->GetVertexBuffer().Get(), VERTEX_INDEX_DXGI_FORMAT, 0 ); + bool hasPS = false; + for ( auto const& batch : dynBatches ) { + if ( batch.Instances.empty() ) continue; + + g_windBuffer.minHeight = batch.VisualInfo->BBox.Min.y; + g_windBuffer.maxHeight = batch.VisualInfo->BBox.Max.y; + + if ( ActiveVS ) { + windParamsCB.Update( &g_windBuffer ); + } + + zCTexture* previousTx = nullptr; + for ( auto const& itt : batch.VisualInfo->MeshesByTexture ) { + const std::vector& mlist = itt.second; + if ( mlist.empty() ) continue; + + zCTexture* tx = itt.first.Texture; + bool bindTexture = previousTx != tx + && tx + && (tx->HasAlphaChannel() || colorWritesEnabled); + + bool blendAdd = itt.first.Material->GetAlphaFunc() == zMAT_ALPHA_FUNC_ADD; + bool blendBlend = itt.first.Material->GetAlphaFunc() == zMAT_ALPHA_FUNC_BLEND; + if ( blendAdd || blendBlend ) { + continue; // shadow pass, transparent materials shouldn't cast shadows + } - unsigned int max = - Engine::GAPI->GetRendererState().RendererSettings.MaxNumFaces * 3; - numIndices = max != 0 ? (numIndices < max ? numIndices : max) : numIndices; + for ( unsigned int i = 0; i < mlist.size(); i++ ) { + if ( bindTexture ) { + if ( alphaRef > 0.0f && tx->CacheIn( 0.6f ) == zRES_CACHED_IN ) { + tx->Bind( 0 ); + if (!hasPS) { + ActivePS->Apply(); + hasPS = true; + } + previousTx = tx; + } else + continue; + } else { + if ( !linearDepth && hasPS ) { + Context->PSSetShader( nullptr, nullptr, 0 ); + hasPS = false; + } + } - // Draw the batch - GetContext()->DrawIndexedInstanced( numIndices, numInstances, indexOffset, 0, - startInstanceNum ); + MeshInfo* mi = mlist[i]; - Engine::GAPI->GetRendererState().RendererInfo.FrameDrawnTriangles += - (numIndices / 3) * numInstances; + DrawInstanced( mi->MeshVertexBuffer, mi->MeshIndexBuffer, + mi->Indices.size(), DynamicInstancingBuffer.get(), + sizeof( VobInstanceInfo ), batch.Instances.size(), + sizeof( ExVertexStruct ), batch.StartInstanceNum ); - Engine::GAPI->GetRendererState().RendererInfo.FrameDrawnVobs++; + Engine::GAPI->GetRendererState().RendererInfo.FrameDrawnVobs += + batch.Instances.size(); + } + } } } - - // Reset visual - if ( doReset ) staticMeshVisual->StartNewFrame(); } } @@ -5687,7 +5816,6 @@ void XM_CALLCONV D3D11GraphicsEngine::DrawWorldAroundForWorldShadow( FXMVECTOR p } Engine::GAPI->GetRendererState().BlendState.ColorWritesEnabled = true; - Engine::GAPI->GetRendererState().BlendState.SetDirty(); } /** Update morph mesh visual */ @@ -5775,7 +5903,20 @@ XRESULT D3D11GraphicsEngine::DrawVOBsInstanced() { { auto _ = START_TIMING( "VOBs" ); - SetDefaultStates(); + + bool needsDrawVobs = true; + if ( m_VobAtlasPass->IsReady() ) { + Frustum cameraFrustum = Frustum::AlwaysContainingFrustum(); + if ( auto cam = zCCamera::GetCamera() ) { + cam->Activate(); + cameraFrustum.BuildPerspective( + XMMatrixTranspose( XMLoadFloat4x4( &cam->trafoView ) ), + XMLoadFloat4x4( &cam->trafoProjection ) ); + } + m_VobAtlasPass->Draw( cameraFrustum ); + needsDrawVobs = false; + } + SetActivePixelShader( PShaderID::PS_Diffuse ); SetActiveVertexShader( VShaderID::VS_ExInstancedObj ); @@ -5824,7 +5965,12 @@ XRESULT D3D11GraphicsEngine::DrawVOBsInstanced() { if ( !renderSettings.FixViewFrustum || (renderSettings.FixViewFrustum && vobs.empty()) ) { - Engine::GAPI->CollectVisibleVobs( vobs, m_FrameLights, mobs ); + + EBspTreeCollectFlags collectFlags = EBspTreeCollectFlags::COLLECT_ALL_MUTATE; + if (!needsDrawVobs) { + collectFlags = (EBspTreeCollectFlags)(collectFlags & ~EBspTreeCollectFlags::COLLECT_VOBS); + } + Engine::GAPI->CollectVisibleVobs( vobs, m_FrameLights, mobs, CullAll, collectFlags ); } } @@ -5832,15 +5978,15 @@ XRESULT D3D11GraphicsEngine::DrawVOBsInstanced() { UpdateMorphMeshVisual(); } - if ( renderSettings.DrawVOBs ) { + if ( renderSettings.DrawVOBs && vobs.size() > 0 ) { auto _1 = Engine::GraphicsEngine->RecordGraphicsEvent( L"DrawVOBsInstanced->DrawVOBs" ); - + std::vector activeVisuals; activeVisuals.reserve( 256 ); // Reserve enough memory to avoid allocations for ( auto const& pair : Engine::GAPI->GetStaticMeshVisuals() ) { if ( !pair.second->Instances.empty() ) { activeVisuals.push_back( pair.second ); - } + } } // Create instancebuffer for this frame @@ -6175,10 +6321,8 @@ XRESULT D3D11GraphicsEngine::DrawFrameAlphaMeshes() else if ( blendBlend ) Engine::GAPI->GetRendererState().BlendState.SetAlphaBlending(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); Engine::GAPI->GetRendererState().DepthState.DepthWriteEnabled = false; - Engine::GAPI->GetRendererState().DepthState.SetDirty(); UpdateRenderStates(); } @@ -6234,7 +6378,6 @@ XRESULT D3D11GraphicsEngine::DrawPolyStrips( bool noTextures ) { // Setup renderstates Engine::GAPI->GetRendererState().RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_NONE; - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); XMMATRIX view = Engine::GAPI->GetViewMatrixXM(); Engine::GAPI->SetViewTransformXM( view ); @@ -6302,10 +6445,7 @@ XRESULT D3D11GraphicsEngine::DrawPolyStrips( bool noTextures ) { else if ( blendBlend ) Engine::GAPI->GetRendererState().BlendState.SetAlphaBlending(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); - Engine::GAPI->GetRendererState().DepthState.DepthWriteEnabled = false; - Engine::GAPI->GetRendererState().DepthState.SetDirty(); UpdateRenderStates(); } @@ -6336,10 +6476,6 @@ void D3D11GraphicsEngine::SetDefaultStates( bool force ) { Engine::GAPI->GetRendererState().BlendState.SetDefault(); Engine::GAPI->GetRendererState().DepthState.SetDefault(); - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); - Engine::GAPI->GetRendererState().DepthState.SetDirty(); - if ( force ) { FFRasterizerStateHash = 0; FFBlendStateHash = 0; @@ -6348,6 +6484,12 @@ void D3D11GraphicsEngine::SetDefaultStates( bool force ) { } } +void D3D11GraphicsEngine::InvalidateStateCache() { + FFRasterizerStateHash = 0; + FFBlendStateHash = 0; + FFDepthStencilStateHash = 0; +} + /** Draws the sky using the GSky-Object */ XRESULT D3D11GraphicsEngine::DrawSky() { GSky* sky = Engine::GAPI->GetSky(); @@ -6355,7 +6497,6 @@ XRESULT D3D11GraphicsEngine::DrawSky() { if ( !Engine::GAPI->GetRendererState().RendererSettings.AtmosphericScattering ) { Engine::GAPI->GetRendererState().DepthState.DepthWriteEnabled = false; - Engine::GAPI->GetRendererState().DepthState.SetDirty(); UpdateRenderStates(); #if defined(BUILD_GOTHIC_1_08k) && !defined(BUILD_1_12F) @@ -6423,11 +6564,8 @@ XRESULT D3D11GraphicsEngine::DrawSky() { Engine::GAPI->GetRendererState().DepthState.DepthWriteEnabled = false; Engine::GAPI->GetRendererState().RasterizerState.SetDefault(); - Engine::GAPI->GetRendererState().DepthState.SetDirty(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); Engine::GAPI->GetRendererState().RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_BACK; - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); SetupVS_ExMeshDrawCall(); SetupVS_ExConstantBuffer(); @@ -6449,7 +6587,6 @@ XRESULT D3D11GraphicsEngine::DrawSky() { { SetDefaultStates(); Engine::GAPI->GetRendererState().DepthState.DepthWriteEnabled = false; - Engine::GAPI->GetRendererState().DepthState.SetDirty(); UpdateRenderStates(); // Draw barrier after sky @@ -6573,7 +6710,6 @@ void D3D11GraphicsEngine::DrawVobSingle( VobInfo* vob, zCCamera& camera ) { // Set backface culling Engine::GAPI->GetRendererState().RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_BACK; - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); GetContext()->PSSetSamplers( 0, 1, DefaultSamplerState.GetAddressOf() ); SetActivePixelShader( PShaderID::PS_Preview_Textured ); @@ -6608,7 +6744,6 @@ void D3D11GraphicsEngine::DrawVobSingle( VobInfo* vob, zCCamera& camera ) { // Disable culling again Engine::GAPI->GetRendererState().RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_NONE; - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); GetContext()->PSSetSamplers( 0, 1, ClampSamplerState.GetAddressOf() ); } @@ -6853,7 +6988,6 @@ void D3D11GraphicsEngine::DrawDecalList( const std::vector& decals, auto _ = RecordGraphicsEvent(L"DrawDecalList"); Engine::GAPI->GetRendererState().RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_NONE; - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); XMMATRIX view = Engine::GAPI->GetViewMatrixXM(); Engine::GAPI->SetViewTransformXM( view ); // Update view transform @@ -6862,7 +6996,6 @@ void D3D11GraphicsEngine::DrawDecalList( const std::vector& decals, if ( !lighting ) { SetActivePixelShader( PShaderID::PS_Transparency ); Engine::GAPI->GetRendererState().DepthState.DepthWriteEnabled = false; - Engine::GAPI->GetRendererState().DepthState.SetDirty(); } else { SetActivePixelShader( PShaderID::PS_World ); } @@ -6936,7 +7069,6 @@ void D3D11GraphicsEngine::DrawDecalList( const std::vector& decals, } if ( lastAlphaFunc != alphaFunc ) { - Engine::GAPI->GetRendererState().BlendState.SetDirty(); UpdateRenderStates(); lastAlphaFunc = alphaFunc; } @@ -7010,7 +7142,6 @@ void D3D11GraphicsEngine::DrawQuadMarks() { Engine::GAPI->SetViewTransformXM( view ); // Update view transform Engine::GAPI->GetRendererState().RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_NONE; - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); ActivePS->GetBuffer( "FFPipelineConstantBuffer" ) .Update( &Engine::GAPI->GetRendererState().GraphicsState ) @@ -7064,7 +7195,6 @@ void D3D11GraphicsEngine::DrawQuadMarks() { alphaFunc = mat->GetAlphaFunc(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); UpdateRenderStates(); } @@ -7090,9 +7220,7 @@ void D3D11GraphicsEngine::DrawMQuadMarks() { Engine::GAPI->SetViewTransformXM( view ); // Update view transform Engine::GAPI->GetRendererState().RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_NONE; - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); Engine::GAPI->GetRendererState().DepthState.DepthWriteEnabled = false; - Engine::GAPI->GetRendererState().DepthState.SetDirty(); SetupVS_ExMeshDrawCall(); SetupVS_ExConstantBuffer(); @@ -7122,7 +7250,6 @@ void D3D11GraphicsEngine::DrawMQuadMarks() { alphaFunc = mat->GetAlphaFunc(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); UpdateRenderStates(); } @@ -7194,7 +7321,6 @@ void D3D11GraphicsEngine::DrawFrameParticleMeshes( std::unordered_mapGetRendererState(); state.DepthState.DepthWriteEnabled = false; - state.DepthState.SetDirty(); XMMATRIX view = Engine::GAPI->GetViewMatrixXM(); Engine::GAPI->SetViewTransformXM( view ); @@ -7233,19 +7359,15 @@ void D3D11GraphicsEngine::DrawFrameParticleMeshes( std::unordered_mapGetRendererState(); state.BlendState.SetAdditiveBlending(); - state.BlendState.SetDirty(); state.DepthState.DepthWriteEnabled = false; - state.DepthState.SetDirty(); state.RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_NONE; - state.RasterizerState.SetDirty(); std::vector*>> pvecAdd; std::vector*>> pvecRest; @@ -7407,7 +7526,6 @@ void D3D11GraphicsEngine::DrawFrameParticles( if ( partInfo.BlendMode != lastBlendMode ) { // Setup blend state state.BlendState = blendState; - state.BlendState.SetDirty(); lastBlendMode = partInfo.BlendMode; UpdateRenderStates(); @@ -7421,7 +7539,6 @@ void D3D11GraphicsEngine::DrawFrameParticles( Context->IASetPrimitiveTopology( D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST ); state.BlendState.SetDefault(); - state.BlendState.SetDirty(); bufferParticleColor->BindToPixelShader( Context.Get(), 1 ); bufferParticleDistortion->BindToPixelShader( Context.Get(), 2 ); @@ -7447,6 +7564,17 @@ void D3D11GraphicsEngine::DrawFrameParticles( XRESULT D3D11GraphicsEngine::OnVobRemovedFromWorld( zCVob* vob ) { if ( Engine::ImGuiHandle ) Engine::ImGuiHandle->OnVobRemovedFromWorld( vob ); + // Remove from atlas GPU-culling buffer so it stops drawing + if ( m_VobAtlasPass ) m_VobAtlasPass->OnVobRemovedFromWorld( vob ); + + for ( int32_t i = 0; i < m_StaticVobs.size(); ++i ) { + // remove from static vob cache aswell, by moving to the end and resizing the vector, since order doesn't matter in the cache + if ( m_StaticVobs[i] && m_StaticVobs[i]->Vob == vob ) { + m_StaticVobs[i] = nullptr; + break; + } + } + // Take out of shadowupdate queue for ( auto&& it = FrameShadowUpdateLights.begin(); it != FrameShadowUpdateLights.end(); ++it ) { if ( (*it)->Vob == vob ) { @@ -7468,17 +7596,14 @@ void D3D11GraphicsEngine::UpdateOcclusion() { // Set up states Engine::GAPI->GetRendererState().RasterizerState.SetDefault(); Engine::GAPI->GetRendererState().RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_NONE; - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); Engine::GAPI->GetRendererState().BlendState.SetDefault(); Engine::GAPI->GetRendererState().BlendState.ColorWritesEnabled = false; // Rasterization is faster without writes - Engine::GAPI->GetRendererState().BlendState.SetDirty(); Engine::GAPI->GetRendererState().DepthState.SetDefault(); Engine::GAPI->GetRendererState().DepthState.DepthWriteEnabled = false; // Don't write the bsp-nodes to the depth buffer, also quicker - Engine::GAPI->GetRendererState().DepthState.SetDirty(); UpdateRenderStates(); @@ -7700,7 +7825,6 @@ void D3D11GraphicsEngine::DrawString( const std::string& str, float x, float y, Engine::GAPI->GetRendererState().DepthState.DepthWriteEnabled = false; Engine::GAPI->GetRendererState().DepthState.DepthBufferCompareFunc = GothicDepthBufferStateInfo::CF_COMPARISON_ALWAYS; - Engine::GAPI->GetRendererState().DepthState.SetDirty(); UpdateRenderStates(); @@ -7756,7 +7880,6 @@ void D3D11GraphicsEngine::DrawString( const std::string& str, float x, float y, DrawVertexBuffer( TempVertexBuffer.get(), vertices.size(), sizeof( ExVertexStruct ) ); oldDepthState.ApplyTo( Engine::GAPI->GetRendererState().DepthState ); - Engine::GAPI->GetRendererState().DepthState.SetDirty(); UpdateRenderStates(); @@ -7784,6 +7907,268 @@ void D3D11GraphicsEngine::StorePrevViewProjMatrix() { } } +void D3D11GraphicsEngine::CreateHiZResources() { + auto* device = GetDevice().Get(); + HRESULT hr; + + // Get depth buffer dimensions + UINT width = DepthStencilBuffer->GetSizeX(); + UINT height = DepthStencilBuffer->GetSizeY(); + if ( width == 0 || height == 0 ) + return; + + // Calculate mip count for full mip chain + UINT mipCount = 1; + { + UINT w = width, h = height; + while ( w > 1 || h > 1 ) { + w = (std::max)( w / 2, 1u ); + h = (std::max)( h / 2, 1u ); + mipCount++; + } + } + m_HiZMipCount = mipCount; + + // Create Hi-Z texture: full mip chain, SRV-bindable (used as CS input via SRV) + D3D11_TEXTURE2D_DESC hiZDesc = {}; + hiZDesc.Width = width; + hiZDesc.Height = height; + hiZDesc.MipLevels = mipCount; + hiZDesc.ArraySize = 1; + hiZDesc.Format = DXGI_FORMAT_R32_FLOAT; + hiZDesc.SampleDesc.Count = 1; + hiZDesc.Usage = D3D11_USAGE_DEFAULT; + hiZDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + + hr = device->CreateTexture2D( &hiZDesc, nullptr, m_HiZTexture.ReleaseAndGetAddressOf() ); + if ( FAILED( hr ) ) { + LogError() << "[Hi-Z] Failed to create Hi-Z texture"; + return; + } + + // SRV for the full Hi-Z texture (all mips, used for occlusion testing in CS_CullVobs) + D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.Format = DXGI_FORMAT_R32_FLOAT; + srvDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; + srvDesc.Texture2D.MipLevels = mipCount; + srvDesc.Texture2D.MostDetailedMip = 0; + + hr = device->CreateShaderResourceView( m_HiZTexture.Get(), &srvDesc, m_HiZSRV.ReleaseAndGetAddressOf() ); + if ( FAILED( hr ) ) { + LogError() << "[Hi-Z] Failed to create Hi-Z SRV"; + m_HiZTexture.Reset(); + return; + } + + // Create scratch texture: single mip, UAV-bindable (CS writes here, then we copy to Hi-Z) + D3D11_TEXTURE2D_DESC scratchDesc = {}; + scratchDesc.Width = width; + scratchDesc.Height = height; + scratchDesc.MipLevels = 1; + scratchDesc.ArraySize = 1; + scratchDesc.Format = DXGI_FORMAT_R32_FLOAT; + scratchDesc.SampleDesc.Count = 1; + scratchDesc.Usage = D3D11_USAGE_DEFAULT; + scratchDesc.BindFlags = D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE; + + hr = device->CreateTexture2D( &scratchDesc, nullptr, m_HiZScratch.ReleaseAndGetAddressOf() ); + if ( FAILED( hr ) ) { + LogError() << "[Hi-Z] Failed to create scratch texture"; + m_HiZTexture.Reset(); + m_HiZSRV.Reset(); + return; + } + + // Scratch UAV + D3D11_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; + uavDesc.Format = DXGI_FORMAT_R32_FLOAT; + uavDesc.ViewDimension = D3D11_UAV_DIMENSION_TEXTURE2D; + uavDesc.Texture2D.MipSlice = 0; + + hr = device->CreateUnorderedAccessView( m_HiZScratch.Get(), &uavDesc, m_HiZScratchUAV.ReleaseAndGetAddressOf() ); + if ( FAILED( hr ) ) { + LogError() << "[Hi-Z] Failed to create scratch UAV"; + m_HiZTexture.Reset(); + m_HiZSRV.Reset(); + m_HiZScratch.Reset(); + return; + } + + // Scratch SRV (not strictly needed, but useful for debugging) + D3D11_SHADER_RESOURCE_VIEW_DESC scratchSRVDesc = {}; + scratchSRVDesc.Format = DXGI_FORMAT_R32_FLOAT; + scratchSRVDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; + scratchSRVDesc.Texture2D.MipLevels = 1; + scratchSRVDesc.Texture2D.MostDetailedMip = 0; + + hr = device->CreateShaderResourceView( m_HiZScratch.Get(), &scratchSRVDesc, m_HiZScratchSRV.ReleaseAndGetAddressOf() ); + if ( FAILED( hr ) ) { + LogError() << "[Hi-Z] Failed to create scratch SRV"; + m_HiZTexture.Reset(); + m_HiZSRV.Reset(); + m_HiZScratch.Reset(); + m_HiZScratchUAV.Reset(); + return; + } + + LogInfo() << "[Hi-Z] Created Hi-Z pyramid resources: " << width << "x" << height + << ", " << mipCount << " mip levels"; +} + +void D3D11GraphicsEngine::BuildHiZPyramid() { + if ( !m_HiZTexture || !m_HiZScratch || m_HiZMipCount == 0 ) + return; + + auto hiZCS = ShaderManager->GetCShader( CShaderID::CS_BuildHiZ ); + if ( !hiZCS ) + return; + + auto& context = GetContext(); + + UINT width = DepthStencilBuffer->GetSizeX(); + UINT height = DepthStencilBuffer->GetSizeY(); + + hiZCS->Apply(); + + auto hiZCb = hiZCS->GetBuffer(0).Bind(); + + for ( UINT mip = 0; mip < m_HiZMipCount; mip++ ) { + UINT mipWidth = (std::max)( width >> mip, 1u ); + UINT mipHeight = (std::max)( height >> mip, 1u ); + + // Update constant buffer + HiZBuildConstants cb = {}; + cb.outputWidth = mipWidth; + cb.outputHeight = mipHeight; + cb.inputMipLevel = ( mip > 0 ) ? ( mip - 1 ) : 0; + cb.isCopyPass = ( mip == 0 ) ? 1 : 0; + hiZCb.Update( &cb ); + + // Bind input SRV: + // Mip 0: read from depth buffer copy (avoids DSV/SRV hazard) + // Mip N: read from Hi-Z texture SRV (previous mip levels already filled) + ID3D11ShaderResourceView* inputSRV = nullptr; + if ( mip == 0 ) { + inputSRV = DepthStencilBufferCopy->GetShaderResView().Get(); + } else { + inputSRV = m_HiZSRV.Get(); + } + context->CSSetShaderResources( 0, 1, &inputSRV ); + + // Bind output UAV: always the scratch texture + ID3D11UnorderedAccessView* uav = m_HiZScratchUAV.Get(); + context->CSSetUnorderedAccessViews( 0, 1, &uav, nullptr ); + + // Dispatch + UINT groupsX = ( mipWidth + 7 ) / 8; + UINT groupsY = ( mipHeight + 7 ) / 8; + context->Dispatch( groupsX, groupsY, 1 ); + + // Unbind SRV and UAV to allow the copy + ID3D11ShaderResourceView* nullSRV = nullptr; + ID3D11UnorderedAccessView* nullUAV = nullptr; + context->CSSetShaderResources( 0, 1, &nullSRV ); + context->CSSetUnorderedAccessViews( 0, 1, &nullUAV, nullptr ); + + // Copy scratch (mip 0) -> Hi-Z texture (mip N) + D3D11_BOX srcBox = {}; + srcBox.left = 0; + srcBox.top = 0; + srcBox.right = mipWidth; + srcBox.bottom = mipHeight; + srcBox.front = 0; + srcBox.back = 1; + + context->CopySubresourceRegion( + m_HiZTexture.Get(), + D3D11CalcSubresource( mip, 0, m_HiZMipCount ), + 0, 0, 0, + m_HiZScratch.Get(), + 0, + &srcBox ); + } + + // Clean up CS state + context->CSSetShader( nullptr, nullptr, 0 ); +} + +void D3D11GraphicsEngine::CacheWorldStaticVobs() { + + static std::vector _1; + static std::vector _2; + m_StaticVobs.clear(); + m_StaticVobs.reserve( 1024 ); + + LegacyRenderQueueProxy q( m_StaticVobs, _1, _2 ); + RndCullContext ctx; + ctx.queue = &q; + ctx.cameraPosition = XMFLOAT3( 0, 0, 0 ); + ctx.stage = RenderStage::STAGE_DRAW_WORLD; + ctx.frustum = Frustum::AlwaysContainingFrustum(); + ctx.drawDistances.OutdoorVobs = 1'000'000; + ctx.drawDistances.OutdoorVobsSmall = ctx.drawDistances.OutdoorVobs; + ctx.drawDistances.IndoorVobs = ctx.drawDistances.OutdoorVobs; + ctx.drawDistances.VisualFX = 0; + Engine::GAPI->CollectVisibleVobs( ctx, (EBspTreeCollectFlags)(COLLECT_VOBS | COLLECT_INDOOR_VOBS | COLLECT_DISABLE_CHECK_DIST) ); + + const size_t totalItems = m_StaticVobs.size(); + // Correct math to calculate exact number of batches (rounds up to nearest multiple of 8, AVX ;) ) + const size_t numBatches = (totalItems + 7) / 8; + m_StaticVobsAABBs.clear(); + m_StaticVobsAABBs.reserve( numBatches ); + + for ( size_t i = 0; i < numBatches; ++i ) { + AABB_SoA_Batch8 b = {}; // Zero-initialize the batch + + // Fill the 8 slots in this batch + for ( int j = 0; j < 8; ++j ) { + size_t vobIdx = (i * 8) + j; + + if ( vobIdx < totalItems ) { + // Valid item: Extract and store + DirectX::BoundingBox bb = Frustum::Frustum::BBoxFromzTBBox3D( m_StaticVobs[vobIdx]->Vob->GetBBox() ); + + b.cx[j] = bb.Center.x; + b.cy[j] = bb.Center.y; + b.cz[j] = bb.Center.z; + + b.ex[j] = bb.Extents.x; + b.ey[j] = bb.Extents.y; + b.ez[j] = bb.Extents.z; + } else { + // Out of bounds (tail padding): + // Insert a dummy AABB far outside the map so it is guaranteed to be culled. + // This prevents invalid indices from entering your RenderQueue! + b.cx[j] = 1000000.0f; + b.cy[j] = 1000000.0f; + b.cz[j] = 1000000.0f; + + b.ex[j] = 0.0f; + b.ey[j] = 0.0f; + b.ez[j] = 0.0f; + } + } + + m_StaticVobsAABBs.push_back( b ); + } +} + +void D3D11GraphicsEngine::OnWorldLoaded() +{ + CacheWorldStaticVobs(); + + // --- Build VOB texture atlases: collect unique textures, create Texture2DArray atlases --- + m_VobAtlasPass->Build(); + + // --- Build world mesh atlas: collect textures, build atlases, merge geometry --- + m_MeshAtlasPass->Build(); + + if ( Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.FeatureSet.EnableAtlasWorldMesh + || Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.FeatureSet.EnableAtlasStaticVobs ) { + Engine::GAPI->ReloadTextures(); + } +} + void D3D11GraphicsEngine::StoreVobPreviousTransforms() { if ( !zCCamera::GetCamera() ) { return; // only do this if we actually are in-game diff --git a/D3D11Engine/D3D11GraphicsEngine.h b/D3D11Engine/D3D11GraphicsEngine.h index 33405f66..fd9be714 100644 --- a/D3D11Engine/D3D11GraphicsEngine.h +++ b/D3D11Engine/D3D11GraphicsEngine.h @@ -4,6 +4,11 @@ #include "GothicAPI.h" #include "D3D11ShadowMap.h" #include "D3D11ShaderManager.h" +#include "D3D11PipelineStateObject.h" +#include "D3D11IndirectBuffer.h" +#include "VobCulling.h" +#include "D3D11VobAtlasPass.h" +#include "D3D11MeshAtlasPass.h" struct RenderToDepthStencilBuffer; @@ -55,6 +60,8 @@ struct AlphaMeshData { }; class D3D11GraphicsEngine : public D3D11GraphicsEngineBase { + friend class D3D11VobAtlasPass; + friend class D3D11MeshAtlasPass; public: D3D11GraphicsEngine(); ~D3D11GraphicsEngine() override; @@ -189,6 +196,11 @@ class D3D11GraphicsEngine : public D3D11GraphicsEngineBase { /** Sets up the default rendering state */ void SetDefaultStates( bool force = false ); + /** Invalidates the cached FF state hashes, forcing the next UpdateRenderStates() + * to re-apply all states to D3D11. Call after any code that sets D3D11 states + * directly (e.g. ImGui, external libraries). */ + void InvalidateStateCache(); + /** Returns the current resolution (Maybe supersampled)*/ INT2 GetResolution() override { return m_scaledResolution; }; @@ -345,20 +357,29 @@ class D3D11GraphicsEngine : public D3D11GraphicsEngineBase { D3D11PfxRenderer* GetPfxRenderer() const { return PfxRenderer.get(); } D3D11Texture* GetDistortionTexture() const { return DistortionTexture.get(); } + /** Returns the pipeline state cache for optimal D3D11 state management */ + D3D11PipelineStateCache& GetPipelineStateCache() { return m_PipelineStateCache; } + RenderToTextureBuffer* GetVelocityBuffer() const { return VelocityBuffer.get(); } const XMFLOAT4X4& GetPrevViewProjMatrix() const { return m_PrevViewProjMatrix; } - void StorePrevViewProjMatrix(); - auto GetClampSamplerState() -> auto { return ClampSamplerState.Get(); } auto GetCubeSamplerState() -> auto { return CubeSamplerState.Get(); } auto GetLinearSamplerState() -> auto { return LinearSamplerState.Get(); } D3D11ShadowMap* GetShadowMaps() const { return ShadowMaps.get(); } + void OnWorldLoaded() override; protected: void StoreVobPreviousTransforms(); + void StorePrevViewProjMatrix(); + + void CacheWorldStaticVobs(); + + /** Pipeline state cache for minimizing redundant D3D11 state transitions */ + D3D11PipelineStateCache m_PipelineStateCache; + std::unique_ptr m_FrameLimiter; int m_LastFrameLimit; @@ -459,6 +480,7 @@ class D3D11GraphicsEngine : public D3D11GraphicsEngineBase { /** If true, we will save a screenshot after the next frame */ bool SaveScreenshotNextFrame; + float m_SamplerMipBias = 0.0f; bool m_flipWithTearing; bool m_swapchainflip; bool m_lowlatency; @@ -473,4 +495,24 @@ class D3D11GraphicsEngine : public D3D11GraphicsEngineBase { INT2 NewResolution; void CreateAndBindDefaultSampler(); + + std::vector m_StaticVobs{}; + std::vector m_StaticVobsAABBs{}; + + /** Atlas rendering passes */ + std::unique_ptr m_VobAtlasPass; + std::unique_ptr m_MeshAtlasPass; + + /** Hi-Z occlusion culling resources */ + Microsoft::WRL::ComPtr m_HiZTexture; // Full mip-chain, SRV-only + Microsoft::WRL::ComPtr m_HiZSRV; + Microsoft::WRL::ComPtr m_HiZScratch; // Single-mip scratch for CS UAV writes + Microsoft::WRL::ComPtr m_HiZScratchUAV; + Microsoft::WRL::ComPtr m_HiZScratchSRV; + UINT m_HiZMipCount = 0; + + /** Create Hi-Z pyramid resources (called after depth buffer creation) */ + void CreateHiZResources(); + /** Build the Hi-Z mip chain from the current depth buffer */ + void BuildHiZPyramid(); }; diff --git a/D3D11Engine/D3D11GraphicsEngineBase.cpp b/D3D11Engine/D3D11GraphicsEngineBase.cpp index 2beb8819..dca9838c 100644 --- a/D3D11Engine/D3D11GraphicsEngineBase.cpp +++ b/D3D11Engine/D3D11GraphicsEngineBase.cpp @@ -167,11 +167,6 @@ void D3D11GraphicsEngineBase::SetDefaultStates() { Engine::GAPI->GetRendererState().DepthState.SetDefault(); Engine::GAPI->GetRendererState().SamplerState.SetDefault(); - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); - Engine::GAPI->GetRendererState().DepthState.SetDirty(); - Engine::GAPI->GetRendererState().SamplerState.SetDirty(); - GetContext()->PSSetSamplers( 0, 1, DefaultSamplerState.GetAddressOf() ); UpdateRenderStates(); diff --git a/D3D11Engine/D3D11IndirectBuffer.cpp b/D3D11Engine/D3D11IndirectBuffer.cpp index c1a20c69..a2ef7eb5 100644 --- a/D3D11Engine/D3D11IndirectBuffer.cpp +++ b/D3D11Engine/D3D11IndirectBuffer.cpp @@ -83,10 +83,13 @@ XRESULT D3D11IndirectBuffer::UpdateBuffer( void* data, UINT size ) { if ( SizeInBytes < size ) { size = SizeInBytes; } + if ( !data ) { + return XR_SUCCESS; + } // Assume null-copy? if ( XR_SUCCESS == Map( EMapFlags::M_WRITE_DISCARD, &mappedData, &bsize ) ) { if ( size ) { - bsize = size; + bsize = std::min( bsize, size ); } // Copy data memcpy( mappedData, data, bsize ); diff --git a/D3D11Engine/D3D11LegacyDeferredShading.cpp b/D3D11Engine/D3D11LegacyDeferredShading.cpp index a2dea70e..e274094e 100644 --- a/D3D11Engine/D3D11LegacyDeferredShading.cpp +++ b/D3D11Engine/D3D11LegacyDeferredShading.cpp @@ -37,13 +37,10 @@ XRESULT D3D11LegacyDeferredShading::DrawPointlightLights( if ( settings.LimitLightIntesity ) { Engine::GAPI->GetRendererState().BlendState.BlendOp = GothicBlendStateInfo::BO_BLEND_OP_MAX; } - Engine::GAPI->GetRendererState().BlendState.SetDirty(); Engine::GAPI->GetRendererState().DepthState.DepthWriteEnabled = false; - Engine::GAPI->GetRendererState().DepthState.SetDirty(); Engine::GAPI->GetRendererState().RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_BACK; - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); graphicsEngine->SetupVS_ExMeshDrawCall(); graphicsEngine->SetupVS_ExConstantBuffer(); @@ -124,16 +121,12 @@ XRESULT D3D11LegacyDeferredShading::DrawPointlightLights( if ( Engine::GAPI->GetRendererState().DepthState.DepthBufferEnabled ) { Engine::GAPI->GetRendererState().DepthState.DepthBufferEnabled = false; Engine::GAPI->GetRendererState().RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_FRONT; - Engine::GAPI->GetRendererState().DepthState.SetDirty(); - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); graphicsEngine->UpdateRenderStates(); } } else { if ( !Engine::GAPI->GetRendererState().DepthState.DepthBufferEnabled ) { Engine::GAPI->GetRendererState().DepthState.DepthBufferEnabled = true; Engine::GAPI->GetRendererState().RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_BACK; - Engine::GAPI->GetRendererState().DepthState.SetDirty(); - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); graphicsEngine->UpdateRenderStates(); } } diff --git a/D3D11Engine/D3D11LineRenderer.cpp b/D3D11Engine/D3D11LineRenderer.cpp index d9c4ec9f..6008bcea 100644 --- a/D3D11Engine/D3D11LineRenderer.cpp +++ b/D3D11Engine/D3D11LineRenderer.cpp @@ -65,7 +65,6 @@ XRESULT D3D11LineRenderer::Flush() { engine->SetDefaultStates(); Engine::GAPI->GetRendererState().BlendState.SetAlphaBlending(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); engine->SetupVS_ExMeshDrawCall(); engine->SetupVS_ExConstantBuffer(); @@ -112,7 +111,6 @@ XRESULT D3D11LineRenderer::FlushScreenSpace() { engine->SetDefaultStates(); Engine::GAPI->GetRendererState().BlendState.SetAlphaBlending(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); engine->SetupVS_ExMeshDrawCall(); engine->GetContext()->IASetPrimitiveTopology( D3D11_PRIMITIVE_TOPOLOGY_LINELIST ); diff --git a/D3D11Engine/D3D11MeshAtlasPass.cpp b/D3D11Engine/D3D11MeshAtlasPass.cpp new file mode 100644 index 00000000..460234b9 --- /dev/null +++ b/D3D11Engine/D3D11MeshAtlasPass.cpp @@ -0,0 +1,491 @@ +#include "D3D11MeshAtlasPass.h" +#include "D3D11GraphicsEngine.h" + +#include "D3D11ShaderManager.h" +#include "D3D11VShader.h" +#include "D3D11PShader.h" +#include "D3D11ConstantBuffer.h" +#include "GothicAPI.h" +#include "GSky.h" +#include "RenderToTextureBuffer.h" +#include "WorldObjects.h" +#include "VertexTypes.h" +#include "zCTexture.h" +#include "zCMaterial.h" + +#include +#include + +// ----- globals defined in D3D11GraphicsEngine.cpp ----- +extern bool SupportTextureAtlases; +namespace { + constexpr DXGI_FORMAT VERTEX_INDEX_DXGI_FORMAT = sizeof( VERTEX_INDEX ) == sizeof( unsigned short ) ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R32_UINT; +} + +typedef void( __cdecl* PFN_DRAWMULTIINDEXEDINSTANCEDINDIRECT )( + ID3D11DeviceContext* context, unsigned int drawCount, + ID3D11Buffer* buffer, unsigned int alignedByteOffsetForArgs, + unsigned int alignedByteStrideForArgs ); +extern PFN_DRAWMULTIINDEXEDINSTANCEDINDIRECT DrawMultiIndexedInstancedIndirect; + +// ------------------------------------------------------- + +D3D11MeshAtlasPass::D3D11MeshAtlasPass( D3D11GraphicsEngine* engine ) + : m_Engine( engine ) { +} + +// ============================================================ +// Build – entry point called from OnWorldLoaded +// ============================================================ +void D3D11MeshAtlasPass::Build() { + // Reset everything + for ( size_t i = 0; i < TEXTURE_ATLAS_MAX; i++ ) { + m_WorldMeshDiffuseAtlasses[(DXGI_FORMAT)i].Destroy(); + m_WorldMeshNormalAtlasses[(DXGI_FORMAT)i].Destroy(); + m_WorldMeshFxAtlasses[(DXGI_FORMAT)i].Destroy(); + } + m_WorldMeshDiffuseAtlasLookup.clear(); + m_WorldMeshNormalAtlasLookup.clear(); + m_WorldMeshFxAtlasLookup.clear(); + m_WorldMeshAtlasDrawGroups.clear(); + m_WorldMeshAtlasedSubmeshes.clear(); + m_WorldMeshGlobalVertexBuffer.reset(); + m_WorldMeshGlobalIndexBuffer.reset(); + m_WorldMeshGlobalInstanceIdBuffer.reset(); + m_WorldMeshSubmeshBuffer.reset(); + + if ( !SupportTextureAtlases || + !Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.FeatureSet.EnableAtlasWorldMesh ) { + return; + } + + BuildTextureAtlasses(); + + if ( m_WorldMeshDiffuseAtlasLookup.empty() ) + return; + + BuildGeometryBuffers(); +} + +// ============================================================ +// BuildTextureAtlasses +// ============================================================ +void D3D11MeshAtlasPass::BuildTextureAtlasses() { + struct DiffuseTextureInfo { + zCTexture* gothicTexture; + DXGI_FORMAT Format; + Microsoft::WRL::ComPtr Texture2D; + }; + struct AuxTextureInfo { + D3D11Texture* engineTexture; + DXGI_FORMAT Format; + Microsoft::WRL::ComPtr Texture2D; + }; + + std::unordered_set seenDiffuse; + std::unordered_set seenNormal, seenFx; + std::vector uniqueDiffuse; + std::vector uniqueNormals, uniqueFx; + + auto& worldSections = Engine::GAPI->GetWorldSections(); + for ( auto& [x, row] : worldSections ) { + for ( auto& [y, section] : row ) { + for ( auto const& [meshKey, worldMeshInfo] : section.WorldMeshes ) { + if ( !meshKey.Material ) continue; + + // Skip animated textures + zCTexture* baseTex = meshKey.Material->GetTextureSingle(); + if ( !baseTex ) continue; + unsigned char texFlags = *reinterpret_cast( + reinterpret_cast(baseTex) + GothicMemoryLocations::zCTexture::Offset_Flags ); + if ( texFlags & GothicMemoryLocations::zCTexture::Mask_FlagIsAnimated ) + continue; + + // Only opaque + alpha-test + int alphaFunc = meshKey.Material->GetAlphaFunc(); + if ( alphaFunc > zMAT_ALPHA_FUNC_NONE && alphaFunc != zMAT_ALPHA_FUNC_TEST ) + continue; + + // Skip non-standard materials (water, portals, etc.) + if ( meshKey.Info && meshKey.Info->MaterialType != MaterialInfo::MT_None ) + continue; + + zCTexture* tex = baseTex; + auto cachedState = tex->CacheIn( -1 ); + if ( cachedState != zRES_CACHED_IN ) continue; + + auto surface = tex->GetSurface(); + if ( !surface || !surface->IsSurfaceReady() ) continue; + + auto engineTex = surface->GetEngineTexture(); + if ( !engineTex ) continue; + + // Diffuse + if ( seenDiffuse.insert( tex ).second ) { + D3D11_TEXTURE2D_DESC desc; + engineTex->GetTextureObject()->GetDesc( &desc ); + if ( desc.Format >= 1 && desc.Format < TEXTURE_ATLAS_MAX ) + uniqueDiffuse.push_back( { tex, desc.Format, engineTex->GetTextureObject() } ); + } + + // Normal map + D3D11Texture* normalTex = surface->GetNormalmap(); + if ( normalTex && seenNormal.insert( normalTex ).second ) { + D3D11_TEXTURE2D_DESC desc; + normalTex->GetTextureObject()->GetDesc( &desc ); + if ( desc.Format >= 1 && desc.Format < TEXTURE_ATLAS_MAX ) + uniqueNormals.push_back( { normalTex, desc.Format, normalTex->GetTextureObject() } ); + } + + // FX map + D3D11Texture* fxTex = surface->GetFxMap(); + if ( fxTex && seenFx.insert( fxTex ).second ) { + D3D11_TEXTURE2D_DESC desc; + fxTex->GetTextureObject()->GetDesc( &desc ); + if ( desc.Format >= 1 && desc.Format < TEXTURE_ATLAS_MAX ) + uniqueFx.push_back( { fxTex, desc.Format, fxTex->GetTextureObject() } ); + } + } + } + } + + auto* device = m_Engine->GetDevice().Get(); + auto* context = m_Engine->GetContext().Get(); + + // Build per-format Texture2DArray atlases for diffuse textures + { + std::sort( uniqueDiffuse.begin(), uniqueDiffuse.end(), + []( const DiffuseTextureInfo& a, const DiffuseTextureInfo& b ) { return a.Format < b.Format; } ); + + size_t rangeStart = 0; + while ( rangeStart < uniqueDiffuse.size() ) { + DXGI_FORMAT fmt = uniqueDiffuse[rangeStart].Format; + size_t rangeEnd = rangeStart; + while ( rangeEnd < uniqueDiffuse.size() && uniqueDiffuse[rangeEnd].Format == fmt ) + rangeEnd++; + + std::vector texPtrs; + texPtrs.reserve( rangeEnd - rangeStart ); + for ( size_t i = rangeStart; i < rangeEnd; i++ ) + texPtrs.push_back( uniqueDiffuse[i].Texture2D.Get() ); + + std::basic_string_view txView( texPtrs.data(), texPtrs.size() ); + TextureManager::AtlasResult atlas = TextureManager::CreateAtlasArray( device, context, txView, 2048, 6 ); + + for ( size_t i = 0; i < texPtrs.size(); i++ ) + m_WorldMeshDiffuseAtlasLookup[uniqueDiffuse[rangeStart + i].gothicTexture] = { fmt, atlas.descriptors[i] }; + + m_WorldMeshDiffuseAtlasses[fmt] = atlas; + rangeStart = rangeEnd; + } + } + + // Helper: build aux (normal/fx) atlases + auto buildAuxAtlases = [&]( std::vector& textures, + std::unordered_map& lookup, + std::array& atlasses ) { + std::sort( textures.begin(), textures.end(), + []( const AuxTextureInfo& a, const AuxTextureInfo& b ) { return a.Format < b.Format; } ); + + size_t rangeStart = 0; + while ( rangeStart < textures.size() ) { + DXGI_FORMAT fmt = textures[rangeStart].Format; + size_t rangeEnd = rangeStart; + while ( rangeEnd < textures.size() && textures[rangeEnd].Format == fmt ) + rangeEnd++; + + std::vector texPtrs; + texPtrs.reserve( rangeEnd - rangeStart ); + for ( size_t i = rangeStart; i < rangeEnd; i++ ) + texPtrs.push_back( textures[i].Texture2D.Get() ); + + std::basic_string_view txView( texPtrs.data(), texPtrs.size() ); + TextureManager::AtlasResult atlas = TextureManager::CreateAtlasArray( device, context, txView, 2048, 6 ); + + for ( size_t i = 0; i < texPtrs.size(); i++ ) + lookup[textures[rangeStart + i].engineTexture] = { fmt, atlas.descriptors[i] }; + + atlasses[fmt] = atlas; + rangeStart = rangeEnd; + } + }; + + buildAuxAtlases( uniqueNormals, m_WorldMeshNormalAtlasLookup, m_WorldMeshNormalAtlasses ); + buildAuxAtlases( uniqueFx, m_WorldMeshFxAtlasLookup, m_WorldMeshFxAtlasses ); + + LogInfo() << "World Mesh Atlas: " << uniqueDiffuse.size() << " diffuse, " + << uniqueNormals.size() << " normal, " << uniqueFx.size() << " fx textures"; +} + +// ============================================================ +// BuildGeometryBuffers +// ============================================================ +void D3D11MeshAtlasPass::BuildGeometryBuffers() { + std::vector allVertices; + std::vector allIndices; + std::vector submeshGPU; + + std::map groupsByFormat; + std::unordered_set processedMeshes; + + // Pre-count + { + size_t totalVertices = 0, totalIndices = 0, totalSubmeshes = 0; + auto& ws = Engine::GAPI->GetWorldSections(); + for ( auto& [x, row] : ws ) { + for ( auto& [y, section] : row ) { + for ( auto const& [meshKey, worldMeshInfo] : section.WorldMeshes ) { + if ( !meshKey.Material ) continue; + zCTexture* tex = meshKey.Material->GetTextureSingle(); + if ( m_WorldMeshDiffuseAtlasLookup.find( tex ) != m_WorldMeshDiffuseAtlasLookup.end() ) { + totalVertices += worldMeshInfo->Vertices.size(); + totalIndices += worldMeshInfo->Indices.size(); + totalSubmeshes++; + } + } + } + } + allVertices.reserve( totalVertices ); + allIndices.reserve( totalIndices ); + submeshGPU.reserve( totalSubmeshes ); + } + + auto& worldSections = Engine::GAPI->GetWorldSections(); + for ( auto& [x, row] : worldSections ) { + for ( auto& [y, section] : row ) { + for ( auto const& [meshKey, worldMeshInfo] : section.WorldMeshes ) { + if ( !meshKey.Material ) continue; + + zCTexture* tex = meshKey.Material->GetTextureSingle(); + auto diffIt = m_WorldMeshDiffuseAtlasLookup.find( tex ); + if ( diffIt == m_WorldMeshDiffuseAtlasLookup.end() ) + continue; + + MeshInfo* mi = worldMeshInfo; + if ( !processedMeshes.insert( mi ).second ) + continue; + + m_WorldMeshAtlasedSubmeshes.insert( mi ); + + const TextureAtlasLookup& diffLookup = diffIt->second; + auto& group = groupsByFormat[diffLookup.atlasFormat]; + group.format = diffLookup.atlasFormat; + + UINT baseVertex = static_cast(allVertices.size()); + UINT startIndex = static_cast(allIndices.size()); + + allVertices.insert( allVertices.end(), mi->Vertices.begin(), mi->Vertices.end() ); + allIndices.insert( allIndices.end(), mi->Indices.begin(), mi->Indices.end() ); + + WorldMeshSubmeshGPUData gpuData = {}; + gpuData.diffuseSlice = diffLookup.descriptor.slice; + gpuData.dUStart = diffLookup.descriptor.uStart; + gpuData.dVStart = diffLookup.descriptor.vStart; + gpuData.dUEnd = diffLookup.descriptor.uEnd; + gpuData.dVEnd = diffLookup.descriptor.vEnd; + + UINT flags = 0; + auto surface = tex->GetSurface(); + if ( surface ) { + D3D11Texture* normalTex = surface->GetNormalmap(); + if ( normalTex ) { + auto normIt = m_WorldMeshNormalAtlasLookup.find( normalTex ); + if ( normIt != m_WorldMeshNormalAtlasLookup.end() ) { + gpuData.normalSlice = normIt->second.descriptor.slice; + gpuData.nUStart = normIt->second.descriptor.uStart; + gpuData.nVStart = normIt->second.descriptor.vStart; + gpuData.nUEnd = normIt->second.descriptor.uEnd; + gpuData.nVEnd = normIt->second.descriptor.vEnd; + flags |= 1; // HAS_NORMAL + } + } + + D3D11Texture* fxTex = surface->GetFxMap(); + if ( fxTex ) { + auto fxIt = m_WorldMeshFxAtlasLookup.find( fxTex ); + if ( fxIt != m_WorldMeshFxAtlasLookup.end() ) { + gpuData.fxSlice = fxIt->second.descriptor.slice; + gpuData.fUStart = fxIt->second.descriptor.uStart; + gpuData.fVStart = fxIt->second.descriptor.vStart; + gpuData.fUEnd = fxIt->second.descriptor.uEnd; + gpuData.fVEnd = fxIt->second.descriptor.vEnd; + flags |= 2; // HAS_FX + } + } + } + + int alphaFunc = meshKey.Material->GetAlphaFunc(); + if ( alphaFunc == zMAT_ALPHA_FUNC_TEST || tex->HasAlphaChannel() ) + flags |= 4; // ALPHA_TEST + + gpuData.flags = flags; + + UINT submeshIndex = static_cast(submeshGPU.size()); + submeshGPU.push_back( gpuData ); + + D3D11_DRAW_INDEXED_INSTANCED_INDIRECT_ARGS args = {}; + args.IndexCountPerInstance = static_cast(mi->Indices.size()); + args.InstanceCount = 1; + args.StartIndexLocation = startIndex; + args.BaseVertexLocation = static_cast(baseVertex); + args.StartInstanceLocation = submeshIndex; + group.indirectArgs.push_back( args ); + } + } + } + + if ( allVertices.empty() ) { + LogWarn() << "D3D11MeshAtlasPass::BuildGeometryBuffers: No world mesh vertices for atlas"; + return; + } + + m_WorldMeshGlobalVertexBuffer = std::make_unique(); + m_WorldMeshGlobalVertexBuffer->Init( + allVertices.data(), + static_cast(allVertices.size() * sizeof( ExVertexStruct )), + D3D11VertexBuffer::B_VERTEXBUFFER, + D3D11VertexBuffer::U_IMMUTABLE, + D3D11VertexBuffer::CA_NONE ); + + m_WorldMeshGlobalIndexBuffer = std::make_unique(); + m_WorldMeshGlobalIndexBuffer->Init( + allIndices.data(), + static_cast(allIndices.size() * sizeof( VERTEX_INDEX )), + D3D11VertexBuffer::B_INDEXBUFFER, + D3D11VertexBuffer::U_IMMUTABLE, + D3D11VertexBuffer::CA_NONE ); + + UINT maxIds = static_cast(submeshGPU.size()); + if ( maxIds < 256 ) maxIds = 256; + std::vector instanceIds( maxIds ); + for ( uint32_t i = 0; i < maxIds; i++ ) + instanceIds[i] = i; + + m_WorldMeshGlobalInstanceIdBuffer = std::make_unique(); + m_WorldMeshGlobalInstanceIdBuffer->Init( + instanceIds.data(), + static_cast(instanceIds.size() * sizeof( uint32_t )), + D3D11VertexBuffer::B_VERTEXBUFFER, + D3D11VertexBuffer::U_IMMUTABLE, + D3D11VertexBuffer::CA_NONE ); + + auto* device = m_Engine->GetDevice().Get(); + auto* context = m_Engine->GetContext().Get(); + + m_WorldMeshSubmeshBuffer = std::make_unique>(); + m_WorldMeshSubmeshBuffer->Init( device, static_cast(submeshGPU.size()), false, false ); + m_WorldMeshSubmeshBuffer->UpdateBufferDefault( context, submeshGPU.data(), static_cast(submeshGPU.size()) ); + + m_WorldMeshAtlasDrawGroups.clear(); + for ( auto& [fmt, group] : groupsByFormat ) { + if ( group.indirectArgs.empty() ) + continue; + + UINT bufSize = static_cast(group.indirectArgs.size() * sizeof( D3D11_DRAW_INDEXED_INSTANCED_INDIRECT_ARGS )); + group.indirectBuffer = std::make_unique(); + group.indirectBuffer->Init( + group.indirectArgs.data(), bufSize, + D3D11IndirectBuffer::B_VERTEXBUFFER, + D3D11IndirectBuffer::U_IMMUTABLE, + D3D11IndirectBuffer::CA_NONE ); + + m_WorldMeshAtlasDrawGroups.push_back( std::move( group ) ); + } + + LogInfo() << "World Mesh Atlas geometry: " << allVertices.size() << " vertices, " + << allIndices.size() << " indices, " + << m_WorldMeshAtlasDrawGroups.size() << " format groups, " + << submeshGPU.size() << " submeshes"; +} + +// ============================================================ +// Draw – per-frame indirect draw of atlased world mesh +// ============================================================ +XRESULT D3D11MeshAtlasPass::Draw() { + if ( m_WorldMeshAtlasDrawGroups.empty() || + !m_WorldMeshGlobalVertexBuffer || !m_WorldMeshGlobalIndexBuffer ) + return XR_SUCCESS; + + auto _ = m_Engine->RecordGraphicsEvent( L"DrawWorldMesh_Atlas" ); + auto& context = m_Engine->GetContext(); + + m_Engine->SetDefaultStates(); + + XMMATRIX view = Engine::GAPI->GetViewMatrixXM(); + Engine::GAPI->SetViewTransformXM( view ); + Engine::GAPI->ResetWorldTransform(); + + context->DSSetShader( nullptr, nullptr, 0 ); + context->HSSetShader( nullptr, nullptr, 0 ); + + // --- Bind global geometry --- + UINT strides[2] = { sizeof( ExVertexStruct ), sizeof( uint32_t ) }; + UINT offsets[2] = { 0, 0 }; + ID3D11Buffer* vbs[2] = { + m_WorldMeshGlobalVertexBuffer->GetVertexBuffer().Get(), + m_WorldMeshGlobalInstanceIdBuffer->GetVertexBuffer().Get() + }; + context->IASetVertexBuffers( 0, 2, vbs, strides, offsets ); + context->IASetIndexBuffer( m_WorldMeshGlobalIndexBuffer->GetVertexBuffer().Get(), + VERTEX_INDEX_DXGI_FORMAT, 0 ); + context->IASetPrimitiveTopology( D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST ); + + // Submesh structured buffer -> VS t1 + ID3D11ShaderResourceView* submeshSRV = m_WorldMeshSubmeshBuffer->GetSRV(); + context->VSSetShaderResources( 1, 1, &submeshSRV ); + + // Vertex shader + m_Engine->SetActiveVertexShader( VShaderID::VS_ExWorldAtlas ); + m_Engine->SetupVS_ExMeshDrawCall(); + m_Engine->SetupVS_ExConstantBuffer(); + m_Engine->ActiveVS->Apply(); + + // Pixel shader + m_Engine->SetActivePixelShader( PShaderID::PS_WorldAtlas ); + + m_Engine->ActivePS->GetBuffer(0).Update(&Engine::GAPI->GetRendererState().GraphicsState ).Bind(); + + GSky* sky = Engine::GAPI->GetSky(); + m_Engine->ActivePS->GetBuffer(1).Update( &sky->GetAtmosphereCB() ).Bind(); + + MaterialInfo defMaterial{}; + m_Engine->ActivePS->GetBuffer(2).Update( &defMaterial.buffer ).Bind(); + + m_Engine->InfiniteRangeConstantBuffer->BindToPixelShader( 3 ); + + context->PSSetShaderResources( 4, 1, m_Engine->ReflectionCube.GetAddressOf() ); + + m_Engine->ActivePS->Apply(); + + // --- Draw per format group --- + for ( auto& group : m_WorldMeshAtlasDrawGroups ) { + ID3D11ShaderResourceView* diffuseSRV = m_WorldMeshDiffuseAtlasses[group.format].atlasSRV; + if ( !diffuseSRV ) continue; + + // Bind first available normal/fx atlases (format grouping is per-diffuse) + ID3D11ShaderResourceView* normalSRV = nullptr; + ID3D11ShaderResourceView* fxSRV = nullptr; + for ( size_t i = 0; i < TEXTURE_ATLAS_MAX; i++ ) { + if ( !normalSRV && m_WorldMeshNormalAtlasses[i].atlasSRV ) + normalSRV = m_WorldMeshNormalAtlasses[i].atlasSRV; + if ( !fxSRV && m_WorldMeshFxAtlasses[i].atlasSRV ) + fxSRV = m_WorldMeshFxAtlasses[i].atlasSRV; + } + + ID3D11ShaderResourceView* psSRVs[3] = { diffuseSRV, normalSRV, fxSRV }; + context->PSSetShaderResources( 0, 3, psSRVs ); + + DrawMultiIndexedInstancedIndirect( + context.Get(), + static_cast(group.indirectArgs.size()), + group.indirectBuffer->GetIndirectBuffer().Get(), + 0, + sizeof( D3D11_DRAW_INDEXED_INSTANCED_INDIRECT_ARGS ) ); + } + + // Unbind submesh buffer from VS + ID3D11ShaderResourceView* nullSRV = nullptr; + context->VSSetShaderResources( 1, 1, &nullSRV ); + + return XR_SUCCESS; +} diff --git a/D3D11Engine/D3D11MeshAtlasPass.h b/D3D11Engine/D3D11MeshAtlasPass.h new file mode 100644 index 00000000..e8c4a2f0 --- /dev/null +++ b/D3D11Engine/D3D11MeshAtlasPass.h @@ -0,0 +1,83 @@ +#pragma once +#include "D3D11AtlasTypes.h" +#include "D3D11StructuredBuffer.h" +#include "D3D11VertexBuffer.h" +#include "D3D11ConstantBuffer.h" +#include "ConstantBufferStructs.h" + +#include +#include +#include +#include +#include +#include + +class D3D11GraphicsEngine; +class D3D11Texture; +class zCTexture; +struct MeshInfo; + +/** + * Encapsulates all texture-atlas-based GPU-driven rendering for world mesh geometry. + * + * Responsibilities: + * - Building per-format Texture2DArray atlases for world-mesh diffuse, normal, and FX textures + * - Building the merged global world-mesh VB/IB with per-submesh indirect-args buffers + * - Executing the multi-indirect draw of atlased world mesh geometry each frame + * + * The engine keeps one instance of this class. Call Build() when a new world + * is loaded (OnWorldLoaded), and Draw() every frame instead of the old + * DrawWorldMesh_Atlas(). + */ +class D3D11MeshAtlasPass { + friend class D3D11GraphicsEngine; +public: + explicit D3D11MeshAtlasPass( D3D11GraphicsEngine* engine ); + + /** (Re-)build atlases and geometry buffers. + * Called from D3D11GraphicsEngine::OnWorldLoaded(). */ + void Build(); + + /** Draw atlased world mesh geometry via multi-indirect. */ + XRESULT Draw(); + + /** True once Build() has completed and at least one draw group exists. */ + bool IsReady() const { return !m_WorldMeshAtlasDrawGroups.empty(); } + + /** Returns true if the given MeshInfo was atlased (used to skip it in the legacy path). */ + bool IsSubmeshAtlased( MeshInfo* mi ) const { + return m_WorldMeshAtlasedSubmeshes.count( mi ) != 0; + } + + /** Diffuse atlas lookup (read-only access for shadow passes). */ + const std::unordered_map& GetDiffuseAtlasLookup() const { + return m_WorldMeshDiffuseAtlasLookup; + } + +private: + D3D11GraphicsEngine* m_Engine; + + // ---- Atlas textures (one array per texture type) ---- + std::unordered_map m_WorldMeshDiffuseAtlasLookup; + std::unordered_map m_WorldMeshNormalAtlasLookup; + std::unordered_map m_WorldMeshFxAtlasLookup; + + std::array m_WorldMeshDiffuseAtlasses{}; + std::array m_WorldMeshNormalAtlasses{}; + std::array m_WorldMeshFxAtlasses{}; + + // ---- Global geometry ---- + std::unique_ptr m_WorldMeshGlobalVertexBuffer; + std::unique_ptr m_WorldMeshGlobalIndexBuffer; + std::unique_ptr m_WorldMeshGlobalInstanceIdBuffer; + + // ---- GPU submesh descriptors ---- + std::unique_ptr> m_WorldMeshSubmeshBuffer; + + // ---- Draw groups ---- + std::vector m_WorldMeshAtlasDrawGroups; + std::unordered_set m_WorldMeshAtlasedSubmeshes; + + void BuildTextureAtlasses(); + void BuildGeometryBuffers(); +}; diff --git a/D3D11Engine/D3D11NVHBAO.cpp b/D3D11Engine/D3D11NVHBAO.cpp index a10f168b..f9871ccb 100644 --- a/D3D11Engine/D3D11NVHBAO.cpp +++ b/D3D11Engine/D3D11NVHBAO.cpp @@ -58,7 +58,7 @@ XRESULT D3D11NVHBAO::Render( Input.DepthData.ProjectionMatrix.Layout = GFSDK_SSAO_COLUMN_MAJOR_ORDER; Input.DepthData.MetersToViewSpaceUnits = settings.MetersToViewSpaceUnits; - Input.NormalData.Enable = true; + Input.NormalData.Enable = false; Input.NormalData.pFullResNormalTextureSRV = pFullResNormalTexSRV.Get(); auto identity = XMMatrixIdentity(); Input.NormalData.WorldToViewMatrix.Data = GFSDK_SSAO_Float4x4( reinterpret_cast(&identity) ); // We already have them in view-space diff --git a/D3D11Engine/D3D11PFX_DistanceBlur.cpp b/D3D11Engine/D3D11PFX_DistanceBlur.cpp index 8cd5beb5..505477e8 100644 --- a/D3D11Engine/D3D11PFX_DistanceBlur.cpp +++ b/D3D11Engine/D3D11PFX_DistanceBlur.cpp @@ -28,7 +28,6 @@ XRESULT D3D11PFX_DistanceBlur::Render( ID3D11ShaderResourceView* diffuse ) { auto ps = engine->GetShaderManager().GetPShader( PShaderID::PS_PFX_DistanceBlur ); Engine::GAPI->GetRendererState().BlendState.SetDefault(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); // Copy scene auto tempBuffer = FxRenderer->GetTempBuffer(); diff --git a/D3D11Engine/D3D11PFX_GodRays.cpp b/D3D11Engine/D3D11PFX_GodRays.cpp index b75456ca..96c02f70 100644 --- a/D3D11Engine/D3D11PFX_GodRays.cpp +++ b/D3D11Engine/D3D11PFX_GodRays.cpp @@ -19,7 +19,7 @@ D3D11PFX_GodRays::~D3D11PFX_GodRays() {} /** Draws this effect to the given buffer */ XRESULT D3D11PFX_GodRays::Render( ID3D11ShaderResourceView* backbuffer, - ID3D11ShaderResourceView* normals ) { + ID3D11ShaderResourceView* depth ) { if ( Engine::GAPI->GetSky()->GetAtmoshpereSettings().LightDirection.y <= 0 ) return XR_SUCCESS; // Don't render the godrays in the night-time @@ -82,7 +82,7 @@ XRESULT D3D11PFX_GodRays::Render( ID3D11ShaderResourceView* srvs[2] { backbuffer, - normals, + depth, }; engine->GetContext()->PSSetShaderResources( 0, 2, srvs ); @@ -102,7 +102,6 @@ XRESULT D3D11PFX_GodRays::Render( // Upscale and blend Engine::GAPI->GetRendererState().BlendState.SetAdditiveBlending(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); FxRenderer->CopyTextureToRTV( tempBuffer2->GetShaderResView(), oldRTV, engine->GetResolution() ); diff --git a/D3D11Engine/D3D11PFX_GodRays.h b/D3D11Engine/D3D11PFX_GodRays.h index f5f97fe1..20b51ed1 100644 --- a/D3D11Engine/D3D11PFX_GodRays.h +++ b/D3D11Engine/D3D11PFX_GodRays.h @@ -9,6 +9,6 @@ class D3D11PFX_GodRays : /** Draws this effect to the given buffer */ XRESULT Render( RenderToTextureBuffer* fxbuffer ) override { return XR_FAILED; } - XRESULT Render( ID3D11ShaderResourceView* backbuffer, ID3D11ShaderResourceView* normals ); + XRESULT Render( ID3D11ShaderResourceView* backbuffer, ID3D11ShaderResourceView* depth ); }; diff --git a/D3D11Engine/D3D11PFX_HDR.cpp b/D3D11Engine/D3D11PFX_HDR.cpp index 4fe911c1..d0ae7b7f 100644 --- a/D3D11Engine/D3D11PFX_HDR.cpp +++ b/D3D11Engine/D3D11PFX_HDR.cpp @@ -41,7 +41,6 @@ XRESULT D3D11PFX_HDR::Render( ID3D11RenderTargetView* output, ID3D11ShaderResour D3D11GraphicsEngine* engine = reinterpret_cast(Engine::GraphicsEngine); engine->SetDefaultStates(); Engine::GAPI->GetRendererState().BlendState.BlendEnabled = false; - Engine::GAPI->GetRendererState().BlendState.SetDirty(); engine->UpdateRenderStates(); // Save old rendertargets diff --git a/D3D11Engine/D3D11PFX_HeightFog.cpp b/D3D11Engine/D3D11PFX_HeightFog.cpp index db270e57..7614ec44 100644 --- a/D3D11Engine/D3D11PFX_HeightFog.cpp +++ b/D3D11Engine/D3D11PFX_HeightFog.cpp @@ -120,11 +120,9 @@ XRESULT D3D11PFX_HeightFog::Render( RenderToTextureBuffer* fxbuffer ) { engine->SetDefaultStates(); Engine::GAPI->GetRendererState().RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_NONE; - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); Engine::GAPI->GetRendererState().BlendState.SetDefault(); //Engine::GAPI->GetRendererState().BlendState.SetAdditiveBlending(); Engine::GAPI->GetRendererState().BlendState.BlendEnabled = true; - Engine::GAPI->GetRendererState().BlendState.SetDirty(); // Copy FxRenderer->DrawFullScreenQuad(); diff --git a/D3D11Engine/D3D11PFX_SMAA.cpp b/D3D11Engine/D3D11PFX_SMAA.cpp index 0b2217b7..14ed5525 100644 --- a/D3D11Engine/D3D11PFX_SMAA.cpp +++ b/D3D11Engine/D3D11PFX_SMAA.cpp @@ -30,7 +30,18 @@ void D3D11PFX_SMAA::RenderPostFX( const Microsoft::WRL::ComPtr(Engine::GraphicsEngine); ID3D11DeviceContext* pContext = engine->GetContext().Get(); - engine->SetDefaultStates(); + // Configure states that SMAA needs through the Gothic state system + auto& state = Engine::GAPI->GetRendererState(); + state.RasterizerState.SetDefault(); + state.RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_NONE; + state.RasterizerState.DepthClipEnable = true; + + state.DepthState.DepthBufferEnabled = false; + state.DepthState.DepthWriteEnabled = false; + state.DepthState.DepthBufferCompareFunc = GothicDepthBufferStateInfo::CF_COMPARISON_ALWAYS; + + state.BlendState.SetDefault(); + engine->UpdateRenderStates(); Microsoft::WRL::ComPtr OldRTV; diff --git a/D3D11Engine/D3D11PfxRenderer.cpp b/D3D11Engine/D3D11PfxRenderer.cpp index 7a60f8d2..848763be 100644 --- a/D3D11Engine/D3D11PfxRenderer.cpp +++ b/D3D11Engine/D3D11PfxRenderer.cpp @@ -68,8 +68,8 @@ XRESULT D3D11PfxRenderer::RenderHeightfog() { } /** Renders the godrays-Effect */ -XRESULT D3D11PfxRenderer::RenderGodRays(ID3D11ShaderResourceView* backbuffer, ID3D11ShaderResourceView* normals) { - return FX_GodRays->Render( backbuffer , normals ); +XRESULT D3D11PfxRenderer::RenderGodRays(ID3D11ShaderResourceView* backbuffer, ID3D11ShaderResourceView* depth) { + return FX_GodRays->Render( backbuffer , depth ); } /** Renders the depth-of-field effect */ diff --git a/D3D11Engine/D3D11PfxRenderer.h b/D3D11Engine/D3D11PfxRenderer.h index e1fafcb1..10f38389 100644 --- a/D3D11Engine/D3D11PfxRenderer.h +++ b/D3D11Engine/D3D11PfxRenderer.h @@ -47,7 +47,7 @@ class D3D11PfxRenderer { XRESULT RenderSimpleSharpen( const Microsoft::WRL::ComPtr& input, INT2 inputSize, const Microsoft::WRL::ComPtr& output, INT2 outputSize, RenderToTextureBuffer& intermediateBuffer ); /** Renders the godrays-Effect */ - XRESULT RenderGodRays(ID3D11ShaderResourceView* backbuffer, ID3D11ShaderResourceView* normals); + XRESULT RenderGodRays(ID3D11ShaderResourceView* backbuffer, ID3D11ShaderResourceView* depth); /** Renders the depth-of-field effect */ XRESULT RenderDepthOfField(ID3D11ShaderResourceView* backbuffer); diff --git a/D3D11Engine/D3D11PipelineStateObject.cpp b/D3D11Engine/D3D11PipelineStateObject.cpp new file mode 100644 index 00000000..cd157a26 --- /dev/null +++ b/D3D11Engine/D3D11PipelineStateObject.cpp @@ -0,0 +1,291 @@ +#include "pch.h" +#include "D3D11PipelineStateObject.h" +#include "GothicGraphicsState.h" +#include "D3D11VShader.h" +#include "D3D11PShader.h" +#include "D3D11GShader.h" +#include "D3D11HDShader.h" +#include "Toolbox.h" + +// --------------------------------------------------------------------------- +// D3D11PipelineStateObject::Desc +// --------------------------------------------------------------------------- + +D3D11PipelineStateObject::Desc::Desc() { + BlendState.SetDefault(); + RasterizerState.SetDefault(); + DepthStencilState.SetDefault(); + std::fill( std::begin( RTVFormats ), std::end( RTVFormats ), DXGI_FORMAT_UNKNOWN ); +} + +// --------------------------------------------------------------------------- +// D3D11PipelineStateObject +// --------------------------------------------------------------------------- + +D3D11PipelineStateObject::D3D11PipelineStateObject( const Desc& desc ) + : m_VS( desc.VS ) + , m_PS( desc.PS ) + , m_GS( desc.GS ) + , m_HDS( desc.HDS ) + , m_BlendState( desc.BlendState ) + , m_SampleMask( desc.SampleMask ) + , m_RasterizerState( desc.RasterizerState ) + , m_DepthStencilState( desc.DepthStencilState ) + , m_TopologyType( desc.TopologyType ) + , m_NumRenderTargets( desc.NumRenderTargets ) + , m_DSVFormat( desc.DSVFormat ) + , m_SampleDesc( desc.SampleDesc ) +{ + memcpy( m_RTVFormats, desc.RTVFormats, sizeof( m_RTVFormats ) ); + + // Ensure the Gothic state hashes are up to date + m_BlendState.ComputeHash(); + m_RasterizerState.ComputeHash(); + m_DepthStencilState.ComputeHash(); + + ComputeHash(); +} + +static void HashPointer( std::size_t& seed, const void* ptr ) { + auto v = reinterpret_cast( ptr ); + Toolbox::hash_combine( seed, static_cast( v ) ); + if constexpr ( sizeof( uintptr_t ) > sizeof( DWORD ) ) { + Toolbox::hash_combine( seed, static_cast( v >> 32 ) ); + } +} + +void D3D11PipelineStateObject::ComputeHash() { + m_Hash = 0; + + // Shader identity: use raw pointer value as a unique id + HashPointer( m_Hash, m_VS.get() ); + HashPointer( m_Hash, m_PS.get() ); + HashPointer( m_Hash, m_GS.get() ); + HashPointer( m_Hash, m_HDS.get() ); + + // Fixed-function state hashes + m_BlendState.ComputeHash(); + Toolbox::hash_combine( m_Hash, static_cast( m_BlendState.Hash ) ); + m_RasterizerState.ComputeHash(); + Toolbox::hash_combine( m_Hash, static_cast( m_RasterizerState.Hash ) ); + m_DepthStencilState.ComputeHash(); + Toolbox::hash_combine( m_Hash, static_cast( m_DepthStencilState.Hash ) ); + + // Sample mask + Toolbox::hash_combine( m_Hash, static_cast(m_SampleMask) ); + + // Topology + Toolbox::hash_combine( m_Hash, static_cast( m_TopologyType ) ); + + // Render target formats + Toolbox::hash_combine( m_Hash, static_cast( m_NumRenderTargets ) ); + for ( UINT i = 0; i < 8; ++i ) { + Toolbox::hash_combine( m_Hash, static_cast( m_RTVFormats[i] ) ); + } + Toolbox::hash_combine( m_Hash, static_cast( m_DSVFormat ) ); + + // Sample desc + Toolbox::hash_combine( m_Hash, static_cast( m_SampleDesc.Count ) ); + Toolbox::hash_combine( m_Hash, static_cast( m_SampleDesc.Quality ) ); +} + +// --------------------------------------------------------------------------- +// D3D11PipelineStateCache +// --------------------------------------------------------------------------- + +void D3D11PipelineStateCache::Init( ID3D11Device1* device, ID3D11DeviceContext1* context ) { + m_Device = device; + m_Context = context; +} + +void D3D11PipelineStateCache::SetPipelineState( const D3D11PipelineStateObject& pso ) { + // Fast-out: if the same PSO is already fully bound, nothing to do + if ( pso.GetHash() == m_BoundState.PSOHash ) + return; + + // --- Vertex Shader ------------------------------------------------------- + const size_t vsHash = reinterpret_cast( pso.GetVS().get() ); + if ( vsHash != m_BoundState.VSHash ) { + if ( pso.GetVS() ) { + pso.GetVS()->Apply(); + } else { + m_Context->VSSetShader( nullptr, nullptr, 0 ); + } + m_BoundState.VSHash = vsHash; + } + + // --- Pixel Shader -------------------------------------------------------- + const size_t psHash = reinterpret_cast( pso.GetPS().get() ); + if ( psHash != m_BoundState.PSHash ) { + if ( pso.GetPS() ) { + pso.GetPS()->Apply(); + } else { + m_Context->PSSetShader( nullptr, nullptr, 0 ); + } + m_BoundState.PSHash = psHash; + } + + // --- Geometry Shader ----------------------------------------------------- + const size_t gsHash = reinterpret_cast( pso.GetGS().get() ); + if ( gsHash != m_BoundState.GSHash ) { + if ( pso.GetGS() ) { + pso.GetGS()->Apply(); + } else { + m_Context->GSSetShader( nullptr, nullptr, 0 ); + } + m_BoundState.GSHash = gsHash; + } + + // --- Hull / Domain Shader ------------------------------------------------ + const size_t hdsHash = reinterpret_cast( pso.GetHDS().get() ); + if ( hdsHash != m_BoundState.HDSHash ) { + if ( pso.GetHDS() ) { + pso.GetHDS()->Apply(); + } else { + m_Context->HSSetShader( nullptr, nullptr, 0 ); + m_Context->DSSetShader( nullptr, nullptr, 0 ); + } + m_BoundState.HDSHash = hdsHash; + } + + // --- Blend State --------------------------------------------------------- + const size_t blendHash = pso.GetBlendState().Hash; + if ( blendHash != m_BoundState.BlendHash ) { + auto blendState = GetOrCreateBlendState( pso.GetBlendState() ); + const float blendFactor[4] = { 0, 0, 0, 0 }; + m_Context->OMSetBlendState( blendState.Get(), blendFactor, pso.GetSampleMask() ); + m_BoundState.BlendHash = blendHash; + m_BoundState.SampleMask = pso.GetSampleMask(); + } else if ( pso.GetSampleMask() != m_BoundState.SampleMask ) { + // Same blend state but different sample mask — need to rebind + auto it = m_BlendStates.find( blendHash ); + if ( it != m_BlendStates.end() ) { + const float blendFactor[4] = { 0, 0, 0, 0 }; + m_Context->OMSetBlendState( it->second.Get(), blendFactor, pso.GetSampleMask() ); + } + m_BoundState.SampleMask = pso.GetSampleMask(); + } + + // --- Rasterizer State ---------------------------------------------------- + const size_t rastHash = pso.GetRasterizerState().Hash; + if ( rastHash != m_BoundState.RasterizerHash ) { + auto rastState = GetOrCreateRasterizerState( pso.GetRasterizerState() ); + m_Context->RSSetState( rastState.Get() ); + m_BoundState.RasterizerHash = rastHash; + } + + // --- Depth-Stencil State ------------------------------------------------- + const size_t dsHash = pso.GetDepthStencilState().Hash; + if ( dsHash != m_BoundState.DepthStencilHash ) { + auto dsState = GetOrCreateDepthStencilState( pso.GetDepthStencilState() ); + m_Context->OMSetDepthStencilState( dsState.Get(), 0 ); + m_BoundState.DepthStencilHash = dsHash; + } + + // --- Primitive Topology -------------------------------------------------- + const D3D11_PRIMITIVE_TOPOLOGY topology = pso.GetD3D11Topology(); + if ( topology != m_BoundState.Topology ) { + m_Context->IASetPrimitiveTopology( topology ); + m_BoundState.Topology = topology; + } + + // Mark whole PSO as bound + m_BoundState.PSOHash = pso.GetHash(); +} + +void D3D11PipelineStateCache::Invalidate() { + m_BoundState = BoundState{}; +} + +void D3D11PipelineStateCache::Clear() { + Invalidate(); + m_BlendStates.clear(); + m_RasterizerStates.clear(); + m_DepthStencilStates.clear(); +} + +// --------------------------------------------------------------------------- +// State object creation helpers +// --------------------------------------------------------------------------- + +Microsoft::WRL::ComPtr +D3D11PipelineStateCache::GetOrCreateBlendState( const GothicBlendStateInfo& desc ) { + auto it = m_BlendStates.find( desc.Hash ); + if ( it != m_BlendStates.end() ) + return it->second; + + D3D11_BLEND_DESC bd = {}; + bd.AlphaToCoverageEnable = desc.AlphaToCoverage; + bd.IndependentBlendEnable = FALSE; + + bd.RenderTarget[0].BlendEnable = desc.BlendEnabled; + bd.RenderTarget[0].SrcBlend = static_cast( desc.SrcBlend ); + bd.RenderTarget[0].DestBlend = static_cast( desc.DestBlend ); + bd.RenderTarget[0].BlendOp = static_cast( desc.BlendOp ); + bd.RenderTarget[0].SrcBlendAlpha = static_cast( desc.SrcBlendAlpha ); + bd.RenderTarget[0].DestBlendAlpha = static_cast( desc.DestBlendAlpha ); + bd.RenderTarget[0].BlendOpAlpha = static_cast( desc.BlendOpAlpha ); + bd.RenderTarget[0].RenderTargetWriteMask = desc.ColorWritesEnabled + ? ( D3D11_COLOR_WRITE_ENABLE_RED | D3D11_COLOR_WRITE_ENABLE_GREEN | + D3D11_COLOR_WRITE_ENABLE_BLUE | D3D11_COLOR_WRITE_ENABLE_ALPHA ) + : 0; + + Microsoft::WRL::ComPtr state; + m_Device->CreateBlendState( &bd, state.GetAddressOf() ); + m_BlendStates[desc.Hash] = state; + return state; +} + +Microsoft::WRL::ComPtr +D3D11PipelineStateCache::GetOrCreateRasterizerState( const GothicRasterizerStateInfo& desc ) { + auto it = m_RasterizerStates.find( desc.Hash ); + if ( it != m_RasterizerStates.end() ) + return it->second; + + D3D11_RASTERIZER_DESC rd = {}; + rd.CullMode = static_cast( desc.CullMode ); + rd.FillMode = desc.Wireframe ? D3D11_FILL_WIREFRAME : D3D11_FILL_SOLID; + rd.FrontCounterClockwise = desc.FrontCounterClockwise; + rd.DepthBias = desc.ZBias; + rd.DepthBiasClamp = 0; + rd.SlopeScaledDepthBias = 0; + rd.DepthClipEnable = desc.DepthClipEnable; + rd.ScissorEnable = false; + rd.MultisampleEnable = false; + rd.AntialiasedLineEnable = true; + + Microsoft::WRL::ComPtr state; + m_Device->CreateRasterizerState( &rd, state.GetAddressOf() ); + m_RasterizerStates[desc.Hash] = state; + return state; +} + +Microsoft::WRL::ComPtr +D3D11PipelineStateCache::GetOrCreateDepthStencilState( const GothicDepthBufferStateInfo& desc ) { + auto it = m_DepthStencilStates.find( desc.Hash ); + if ( it != m_DepthStencilStates.end() ) + return it->second; + + D3D11_DEPTH_STENCIL_DESC dd = {}; + dd.DepthEnable = desc.DepthBufferEnabled; + dd.DepthWriteMask = desc.DepthWriteEnabled ? D3D11_DEPTH_WRITE_MASK_ALL + : D3D11_DEPTH_WRITE_MASK_ZERO; + dd.DepthFunc = static_cast( desc.DepthBufferCompareFunc ); + + dd.StencilEnable = false; + dd.StencilReadMask = 0xFF; + dd.StencilWriteMask = 0xFF; + dd.FrontFace.StencilFailOp = D3D11_STENCIL_OP_KEEP; + dd.FrontFace.StencilDepthFailOp = D3D11_STENCIL_OP_INCR; + dd.FrontFace.StencilPassOp = D3D11_STENCIL_OP_KEEP; + dd.FrontFace.StencilFunc = D3D11_COMPARISON_ALWAYS; + dd.BackFace.StencilFailOp = D3D11_STENCIL_OP_KEEP; + dd.BackFace.StencilDepthFailOp = D3D11_STENCIL_OP_DECR; + dd.BackFace.StencilPassOp = D3D11_STENCIL_OP_KEEP; + dd.BackFace.StencilFunc = D3D11_COMPARISON_ALWAYS; + + Microsoft::WRL::ComPtr state; + m_Device->CreateDepthStencilState( &dd, state.GetAddressOf() ); + m_DepthStencilStates[desc.Hash] = state; + return state; +} diff --git a/D3D11Engine/D3D11PipelineStateObject.h b/D3D11Engine/D3D11PipelineStateObject.h new file mode 100644 index 00000000..51408ac0 --- /dev/null +++ b/D3D11Engine/D3D11PipelineStateObject.h @@ -0,0 +1,193 @@ +#pragma once +#include "pch.h" +#include +#include +#include "GothicGraphicsState.h" + +class D3D11VShader; +class D3D11PShader; +class D3D11GShader; +class D3D11HDShader; + +struct GothicBlendStateInfo; +struct GothicRasterizerStateInfo; +struct GothicDepthBufferStateInfo; + +// Mirrors D3D12_PRIMITIVE_TOPOLOGY_TYPE +enum class PrimitiveTopologyType : uint8_t { + Undefined = 0, + Point = 1, + Line = 2, + Triangle = 3, + Patch = 4 +}; + +/** Converts PrimitiveTopologyType to the most common D3D11 topology for that type */ +inline D3D11_PRIMITIVE_TOPOLOGY ToD3D11Topology( PrimitiveTopologyType type ) { + switch ( type ) { + case PrimitiveTopologyType::Point: return D3D11_PRIMITIVE_TOPOLOGY_POINTLIST; + case PrimitiveTopologyType::Line: return D3D11_PRIMITIVE_TOPOLOGY_LINELIST; + case PrimitiveTopologyType::Triangle: return D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST; + case PrimitiveTopologyType::Patch: return D3D11_PRIMITIVE_TOPOLOGY_3_CONTROL_POINT_PATCHLIST; + default: return D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED; + } +} + +/** + * Immutable pipeline state object, modeled after D3D12_GRAPHICS_PIPELINE_STATE_DESC. + * + * Captures the full set of static pipeline configuration that in DX12 would be + * baked into a single ID3D12PipelineState: + * - Shader stages (VS, PS, GS, Hull/Domain) + * - Blend, Rasterizer, DepthStencil states + * - Primitive topology type + * - Sample mask / sample desc + * - Render-target and depth-stencil formats + * + * Once constructed the object is immutable. A hash is computed at creation + * time so that the PipelineStateCache can quickly detect redundant state sets. + */ +class D3D11PipelineStateObject { +public: + /** Descriptor used to build a PSO – fill this in then pass to the constructor. */ + struct Desc { + // --- Shader stages (nullable) ---------------------------------------- + std::shared_ptr VS; + std::shared_ptr PS; + std::shared_ptr GS; + std::shared_ptr HDS; // Hull + Domain (combined, matching existing codebase) + + // --- Fixed-function state -------------------------------------------- + GothicBlendStateInfo BlendState; + UINT SampleMask = 0xFFFFFFFF; + GothicRasterizerStateInfo RasterizerState; + GothicDepthBufferStateInfo DepthStencilState; + + // --- Input assembly -------------------------------------------------- + PrimitiveTopologyType TopologyType = PrimitiveTopologyType::Triangle; + + // --- Render target description (for future DX12) --------------------- + UINT NumRenderTargets = 1; + DXGI_FORMAT RTVFormats[8] = {}; + DXGI_FORMAT DSVFormat = DXGI_FORMAT_D32_FLOAT; + DXGI_SAMPLE_DESC SampleDesc = { 1, 0 }; + + Desc(); + }; + + explicit D3D11PipelineStateObject( const Desc& desc ); + + // --- Accessors (const, PSO is immutable) --------------------------------- + + size_t GetHash() const { return m_Hash; } + bool operator==( const D3D11PipelineStateObject& o ) const { return m_Hash == o.m_Hash; } + bool operator!=( const D3D11PipelineStateObject& o ) const { return m_Hash != o.m_Hash; } + + const std::shared_ptr& GetVS() const { return m_VS; } + const std::shared_ptr& GetPS() const { return m_PS; } + const std::shared_ptr& GetGS() const { return m_GS; } + const std::shared_ptr& GetHDS() const { return m_HDS; } + + const GothicBlendStateInfo& GetBlendState() const { return m_BlendState; } + const GothicRasterizerStateInfo& GetRasterizerState() const { return m_RasterizerState; } + const GothicDepthBufferStateInfo& GetDepthStencilState() const { return m_DepthStencilState; } + + UINT GetSampleMask() const { return m_SampleMask; } + PrimitiveTopologyType GetTopologyType() const { return m_TopologyType; } + D3D11_PRIMITIVE_TOPOLOGY GetD3D11Topology() const { return ToD3D11Topology( m_TopologyType ); } + + UINT GetNumRenderTargets() const { return m_NumRenderTargets; } + DXGI_FORMAT GetRTVFormat( UINT i ) const { return (i < 8) ? m_RTVFormats[i] : DXGI_FORMAT_UNKNOWN; } + DXGI_FORMAT GetDSVFormat() const { return m_DSVFormat; } + const DXGI_SAMPLE_DESC& GetSampleDesc() const { return m_SampleDesc; } + +private: + void ComputeHash(); + + // Shaders + std::shared_ptr m_VS; + std::shared_ptr m_PS; + std::shared_ptr m_GS; + std::shared_ptr m_HDS; + + // Fixed-function state (stored by value – small POD structs) + GothicBlendStateInfo m_BlendState; + UINT m_SampleMask; + GothicRasterizerStateInfo m_RasterizerState; + GothicDepthBufferStateInfo m_DepthStencilState; + + // Input assembly + PrimitiveTopologyType m_TopologyType; + + // Render target description + UINT m_NumRenderTargets; + DXGI_FORMAT m_RTVFormats[8]; + DXGI_FORMAT m_DSVFormat; + DXGI_SAMPLE_DESC m_SampleDesc; + + // Combined hash of the entire PSO + size_t m_Hash = 0; +}; + +/** + * Pipeline-state cache that tracks which D3D11 states are currently bound and + * performs the minimal set of API calls when switching to a new PSO. + * + * Usage: + * cache.SetPipelineState(myPSO); // binds everything that changed + * + * Internally caches the D3D11 blend / rasterizer / depth-stencil state COM + * objects so they are created at most once per unique configuration. + */ +class D3D11PipelineStateCache { +public: + D3D11PipelineStateCache() = default; + + /** Initialise with the D3D11 device and immediate context. */ + void Init( ID3D11Device1* device, ID3D11DeviceContext1* context ); + + /** + * Apply a pipeline state object. Only the state that differs from the + * currently bound state will be set on the device context. + */ + void SetPipelineState( const D3D11PipelineStateObject& pso ); + + /** + * Mark all tracked state as unknown, forcing the next SetPipelineState + * to re-bind everything. Call this when external code (e.g. the Gothic + * engine) may have changed D3D11 state behind the cache's back. + */ + void Invalidate(); + + /** Release all cached D3D11 state objects. */ + void Clear(); + +private: + // --- Cached D3D11 state objects (keyed by Gothic state hash) ------------- + Microsoft::WRL::ComPtr GetOrCreateBlendState( const GothicBlendStateInfo& desc ); + Microsoft::WRL::ComPtr GetOrCreateRasterizerState( const GothicRasterizerStateInfo& desc ); + Microsoft::WRL::ComPtr GetOrCreateDepthStencilState( const GothicDepthBufferStateInfo& desc ); + + ID3D11Device1* m_Device = nullptr; + ID3D11DeviceContext1* m_Context = nullptr; + + // --- Currently bound state (tracked to skip redundant API calls) --------- + struct BoundState { + size_t PSOHash = 0; + size_t VSHash = 0; + size_t PSHash = 0; + size_t GSHash = 0; + size_t HDSHash = 0; + size_t BlendHash = 0; + size_t RasterizerHash = 0; + size_t DepthStencilHash = 0; + UINT SampleMask = 0xFFFFFFFF; + D3D11_PRIMITIVE_TOPOLOGY Topology = D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED; + }; + BoundState m_BoundState{}; + + // --- State object caches (one D3D11 object per unique hash) -------------- + std::unordered_map> m_BlendStates; + std::unordered_map> m_RasterizerStates; + std::unordered_map> m_DepthStencilStates; +}; diff --git a/D3D11Engine/D3D11ShaderManager.cpp b/D3D11Engine/D3D11ShaderManager.cpp index 483f3ead..3876e704 100644 --- a/D3D11Engine/D3D11ShaderManager.cpp +++ b/D3D11Engine/D3D11ShaderManager.cpp @@ -245,10 +245,16 @@ XRESULT D3D11ShaderManager::Init() { Shaders.push_back( ShaderInfo::make( "VS_ExInstancedObj.hlsl" ) .with_layout( 10 ) ); + Shaders.push_back( ShaderInfo::make("VS_ExInstancedObjIndirectAtlas.hlsl" ) + .with_layout( 12 ) ); Shaders.push_back( ShaderInfo::make( "VS_ExInstanced.hlsl" ) .with_layout( 4 ) ); + // World mesh atlas vertex shader (uses same layout 12: ExVertexStruct + uint instance remap) + Shaders.push_back( ShaderInfo::make( "VS_ExWorldAtlas.hlsl" ) + .with_layout( 12 ) ); + Shaders.push_back( ShaderInfo::make( "VS_GrassInstanced.hlsl" ) .with_layout( 9 ) ); @@ -360,6 +366,35 @@ XRESULT D3D11ShaderManager::Init() { Shaders.push_back( ShaderInfo::make( "PS_PortalDiffuse.hlsl" ) ); //forest portals, doors, etc. Shaders.push_back( ShaderInfo::make( "PS_WaterfallFoam.hlsl" ) ); //foam on at the base of waterfalls + + Shaders.push_back( ShaderInfo::make( "PS_DiffuseAtlas.hlsl") + .with_macros( makros ) ); // DIST_Distance + + makros.clear(); + m.Name = "NORMALMAPPING"; + m.Definition = "0"; + makros.push_back( m ); + m.Name = "ALPHATEST"; + m.Definition = "1"; + makros.push_back( m ); + + Shaders.push_back( ShaderInfo::make( "PS_DiffuseAtlas.hlsl" ) + .with_macros( makros ) ); // DIST_Distance + + makros.clear(); + m.Name = "NORMALMAPPING"; + m.Definition = "0"; + makros.push_back( m ); + m.Name = "ALPHATEST_SHADOWS"; + m.Definition = "1"; + makros.push_back( m ); + + Shaders.push_back( ShaderInfo::make( "PS_DiffuseAtlas.hlsl" ) + .with_macros( makros ) ); // DIST_Distance + + // World mesh atlas PS — flags-driven normal/FX/alpha-test in a single shader + makros.clear(); + Shaders.push_back( ShaderInfo::make( "PS_WorldAtlas.hlsl" ) ); // DIST_Distance makros.clear(); @@ -542,6 +577,10 @@ XRESULT D3D11ShaderManager::Init() { Shaders.push_back( ShaderInfo::make( "CS_LightCulling.hlsl" )); Shaders.push_back( ShaderInfo::make( "CS_TiledShading.hlsl" )); + + Shaders.push_back( ShaderInfo::make( "CS_CullVobs.hlsl" )); + + Shaders.push_back( ShaderInfo::make( "CS_BuildHiZ.hlsl" ) ); } return XR_SUCCESS; diff --git a/D3D11Engine/D3D11ShadowAtlas.cpp b/D3D11Engine/D3D11ShadowAtlas.cpp index c4bd7984..a24370a9 100644 --- a/D3D11Engine/D3D11ShadowAtlas.cpp +++ b/D3D11Engine/D3D11ShadowAtlas.cpp @@ -184,13 +184,13 @@ ID3D11ShaderResourceView* D3D11ShadowAtlas::GetShaderResourceView() const { return m_srv.Get(); } -void D3D11ShadowAtlas::BindToPixelShader( ID3D11DeviceContext1* context, UINT slot ) const { +void D3D11ShadowAtlas::BindToPixelShader( ID3D11DeviceContext* context, UINT slot ) const { if ( m_srv ) { context->PSSetShaderResources( slot, 1, m_srv.GetAddressOf() ); } } -void D3D11ShadowAtlas::BindToVertexShader( ID3D11DeviceContext1* context, UINT slot ) const { +void D3D11ShadowAtlas::BindToVertexShader( ID3D11DeviceContext* context, UINT slot ) const { if ( m_srv ) { context->VSSetShaderResources( slot, 1, m_srv.GetAddressOf() ); } diff --git a/D3D11Engine/D3D11ShadowAtlas.h b/D3D11Engine/D3D11ShadowAtlas.h index e4ee63bc..ba385be7 100644 --- a/D3D11Engine/D3D11ShadowAtlas.h +++ b/D3D11Engine/D3D11ShadowAtlas.h @@ -68,8 +68,8 @@ class D3D11ShadowAtlas { /** Get the SRV for the atlas texture (Texture2D). */ ID3D11ShaderResourceView* GetShaderResourceView() const; - void BindToPixelShader( ID3D11DeviceContext1* context, UINT slot ) const; - void BindToVertexShader( ID3D11DeviceContext1* context, UINT slot ) const; + void BindToPixelShader( ID3D11DeviceContext* context, UINT slot ) const; + void BindToVertexShader( ID3D11DeviceContext* context, UINT slot ) const; /** Get cascade 0 pixel size (the largest cascade). */ UINT GetCascade0Size() const { return m_cascade0Size; } diff --git a/D3D11Engine/D3D11ShadowMap.cpp b/D3D11Engine/D3D11ShadowMap.cpp index eb723a52..ee17842e 100644 --- a/D3D11Engine/D3D11ShadowMap.cpp +++ b/D3D11Engine/D3D11ShadowMap.cpp @@ -338,10 +338,6 @@ void D3D11ShadowMap::Init( Microsoft::WRL::ComPtr& device, Micros EnsureShadowMapBackend( s ); - for ( int i = 0; i < MAX_CSM_CASCADES; ++i ) { - m_RenderQueues[i] = std::make_unique( device.Get(), context.Get() ); - } - D3D11GraphicsEngineBase* engine = reinterpret_cast( Engine::GraphicsEngine ); // Create constantbuffer for the view-matrices @@ -385,7 +381,7 @@ void D3D11ShadowMap::Resize( int size ) { m_lastNumCascades = static_cast( atlasNumCascades ); } -void D3D11ShadowMap::BindToPixelShader( ID3D11DeviceContext1* context, UINT slot ) { +void D3D11ShadowMap::BindToPixelShader( ID3D11DeviceContext* context, UINT slot ) { if ( m_useAtlas ) { if ( m_shadowAtlas ) m_shadowAtlas->BindToPixelShader( context, slot ); } else { @@ -393,11 +389,11 @@ void D3D11ShadowMap::BindToPixelShader( ID3D11DeviceContext1* context, UINT slot } } -void D3D11ShadowMap::BindSampler( ID3D11DeviceContext1* context, UINT slot ) { +void D3D11ShadowMap::BindSampler( ID3D11DeviceContext* context, UINT slot ) { if ( m_shadowmapSampler ) context->PSSetSamplers( slot, 1, m_shadowmapSampler.GetAddressOf() ); } -void D3D11ShadowMap::BindSamplerToCS( ID3D11DeviceContext1* context, UINT slot ) { +void D3D11ShadowMap::BindSamplerToCS( ID3D11DeviceContext* context, UINT slot ) { if ( m_shadowmapSampler ) context->CSSetSamplers( slot, 1, m_shadowmapSampler.GetAddressOf() ); } @@ -648,81 +644,6 @@ XRESULT D3D11ShadowMap::PrepareRender() } } - // Collect all VOBs inside our shadow draw distance (last frustum) - - static std::vector potentialCasters; - static std::vector _1; - static std::vector _2; - potentialCasters.reserve(1024); - potentialCasters.clear(); - - { - RndCullContext ctx; - LegacyRenderQueueProxy q(potentialCasters, _1, _2); - - ctx.queue = &q; - ctx.frustum = Frustum::AlwaysContainingFrustum(); - ctx.cameraPosition = m_WorldShadowPos; - ctx.stage = RenderStage::STAGE_DRAW_SHADOWS; - ctx.drawDistances.OutdoorVobs = 20000; - ctx.drawDistances.OutdoorVobsSmall = 20000; - - Engine::GAPI->CollectVisibleVobs( ctx ); - } - - auto invView = XMMatrixTranspose(XMLoadFloat4x4(&zCCamera::GetCamera()->GetTransformDX( zCCamera::ETransformType::TT_VIEW_INV ))); - auto camPos = invView.r[3]; - XMVECTOR camForward = XMVector3Normalize( invView.r[2]); - - for ( int i = 0; i < numCascades; ++i ) { - m_RenderQueues[i]->Reset(); - } - - if ( numCascades > 3 ) { - for ( auto vob : potentialCasters ) { - - auto boundingSphere = Frustum::BSphereFromzTBBox3D( vob->Vob->GetBBox() ); - if ( m_CascadeCRs[0].frustum.Intersects( boundingSphere ) ) - m_RenderQueues[0]->GetVobs().push_back( vob ); - - if ( /*m_ShouldUpdateCascade[1] && */m_CascadeCRs[1].frustum.Intersects(boundingSphere) ) - m_RenderQueues[1]->GetVobs().push_back( vob ); - - if ( m_ShouldUpdateCascade[2] && m_CascadeCRs[2].frustum.Intersects( boundingSphere ) ) - m_RenderQueues[2]->GetVobs().push_back( vob ); - - if ( m_ShouldUpdateCascade[3] && m_CascadeCRs[3].frustum.Intersects( boundingSphere ) ) - m_RenderQueues[3]->GetVobs().push_back( vob ); - } - } else if ( numCascades > 2 ) { - for ( auto vob : potentialCasters ) { - auto boundingSphere = Frustum::BSphereFromzTBBox3D( vob->Vob->GetBBox() ); - if ( m_CascadeCRs[0].frustum.Intersects( boundingSphere ) ) - m_RenderQueues[0]->GetVobs().push_back( vob ); - - if ( /*m_ShouldUpdateCascade[1] && */m_CascadeCRs[1].frustum.Intersects( boundingSphere ) ) - m_RenderQueues[1]->GetVobs().push_back( vob ); - - if ( m_ShouldUpdateCascade[2] && m_CascadeCRs[2].frustum.Intersects( boundingSphere ) ) - m_RenderQueues[2]->GetVobs().push_back( vob ); - } - } else if ( numCascades > 1 ) { - for ( auto vob : potentialCasters ) { - auto boundingSphere = Frustum::BSphereFromzTBBox3D( vob->Vob->GetBBox() ); - if ( m_CascadeCRs[0].frustum.Intersects( boundingSphere ) ) - m_RenderQueues[0]->GetVobs().push_back( vob ); - - if ( /*m_ShouldUpdateCascade[1] && */m_CascadeCRs[1].frustum.Intersects( boundingSphere ) ) - m_RenderQueues[1]->GetVobs().push_back( vob ); - } - } else if ( numCascades > 0 ) { - for ( auto vob : potentialCasters ) { - auto boundingSphere = Frustum::BSphereFromzTBBox3D( vob->Vob->GetBBox() ); - if ( m_CascadeCRs[0].frustum.Intersects( boundingSphere ) ) - m_RenderQueues[0]->GetVobs().push_back( vob ); - } - } - return XR_SUCCESS; } @@ -762,7 +683,7 @@ XRESULT D3D11ShadowMap::DrawPointlightShadows( std::vector& light auto& settings = Engine::GAPI->GetRendererState().RendererSettings; // Release any resources of not visible lights - for ( auto& it : Engine::GAPI->VobLightMap ) { + for ( auto& it : Engine::GAPI->VobLights_Sorted ) { if ( it.second->LightShadowBuffers && (!it.second->Vob->IsEnabled() || !it.second->VisibleInFrame) ) { if ( D3D11PointLight* pl = static_cast(it.second->LightShadowBuffers.get()) ) { @@ -980,7 +901,6 @@ XRESULT D3D11ShadowMap::DrawWorldShadow( ) RenderShadowmaps( renderParams ); Engine::GAPI->SetCameraReplacementPtr( nullptr ); - m_RenderQueues[cascadeIdx]->Reset(); } } @@ -996,7 +916,7 @@ XRESULT D3D11ShadowMap::DrawRainShadowmap() { auto graphicsEngine = reinterpret_cast(Engine::GraphicsEngine); auto _ = graphicsEngine->RecordGraphicsEvent( L"DrawRainShadowmap" ); - graphicsEngine->Effects->DrawRainShadowmap(); + return graphicsEngine->Effects->DrawRainShadowmap(); } return XR_SUCCESS; } @@ -1005,8 +925,10 @@ XRESULT D3D11ShadowMap::DrawPointlightLights( std::vector& lights, RenderToTextureBuffer& color, RenderToTextureBuffer& normals, - RenderToTextureBuffer& specular, - RenderToTextureBuffer& depthCopy + RenderToTextureBuffer& specular, + RenderToTextureBuffer& depthCopy, + ID3D11RenderTargetView* outputRTV, + ID3D11DepthStencilView* dsv ) { auto& settings = Engine::GAPI->GetRendererState().RendererSettings; @@ -1022,7 +944,9 @@ XRESULT D3D11ShadowMap::DrawLighting( RenderToTextureBuffer& color, RenderToTextureBuffer& normals, RenderToTextureBuffer& specular, - RenderToTextureBuffer& depthCopy) { + RenderToTextureBuffer& depthCopy, + ID3D11RenderTargetView* outputRTV, + ID3D11DepthStencilView* dsv) { auto graphicsEngine = reinterpret_cast(Engine::GraphicsEngine); auto& settings = Engine::GAPI->GetRendererState().RendererSettings; @@ -1039,7 +963,7 @@ XRESULT D3D11ShadowMap::DrawLighting( Engine::GAPI->SetFarPlane(static_cast(settings.SectionDrawRadius) * WORLD_SECTION_SIZE ); - DrawPointlightLights(lights, color, normals, specular, depthCopy); + DrawPointlightLights(lights, color, normals, specular, depthCopy, outputRTV, dsv); m_context->OMSetRenderTargets( 1, graphicsEngine->GetHDRBackBuffer().GetRenderTargetView().GetAddressOf(), nullptr ); @@ -1054,10 +978,9 @@ XRESULT D3D11ShadowMap::DrawLighting( srvs[0] = specular.GetShaderResView().Get(); m_context->PSSetShaderResources( 7, 1, srvs ); - DrawWorldLights(); + DrawWorldLights( outputRTV ); - m_context->OMSetRenderTargets( 1, graphicsEngine->GetHDRBackBuffer().GetRenderTargetView().GetAddressOf(), - graphicsEngine->GetDepthBuffer()->GetDepthStencilView().Get() ); + m_context->OMSetRenderTargets( 1, &outputRTV, dsv ); return XR_SUCCESS; } @@ -1121,7 +1044,6 @@ void D3D11ShadowMap::RenderShadowmaps( const RenderShadowmapsParams& params ) { m_context->OMSetRenderTargets( 1, params.DebugRTV.GetAddressOf(), dsvOverwrite.Get() ); Engine::GAPI->GetRendererState().BlendState.ColorWritesEnabled = true; } - Engine::GAPI->GetRendererState().BlendState.SetDirty(); // Dont render shadows from the sun when it isn't on the sky if ( isNotWorldShadowMap || @@ -1170,20 +1092,17 @@ void D3D11ShadowMap::RenderShadowmaps( const RenderShadowmapsParams& params ) { WORLD_SECTION_SIZE ); } -XRESULT D3D11ShadowMap::DrawWorldLights() +XRESULT D3D11ShadowMap::DrawWorldLights(ID3D11RenderTargetView* outputRTV) { auto graphicsEngine = reinterpret_cast(Engine::GraphicsEngine); auto _ = graphicsEngine->RecordGraphicsEvent( L"DrawWorldLights" ); auto& settings = Engine::GAPI->GetRendererState().RendererSettings; Engine::GAPI->GetRendererState().BlendState.SetAdditiveBlending(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); Engine::GAPI->GetRendererState().DepthState.DepthBufferCompareFunc = GothicDepthBufferStateInfo::CF_COMPARISON_ALWAYS; - Engine::GAPI->GetRendererState().DepthState.SetDirty(); Engine::GAPI->GetRendererState().RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_NONE; - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); // Modify light when raining float rain = Engine::GAPI->GetRainFXWeight(); @@ -1373,12 +1292,10 @@ void XM_CALLCONV D3D11ShadowMap::RenderShadowCube( Engine::GAPI->GetRendererState().BlendState.ColorWritesEnabled = true; // Should be false, but needs to be true for SV_Depth to work - Engine::GAPI->GetRendererState().BlendState.SetDirty(); } else { m_context->OMSetRenderTargets( 1, debugRTV.GetAddressOf(), face.Get() ); Engine::GAPI->GetRendererState().BlendState.ColorWritesEnabled = true; - Engine::GAPI->GetRendererState().BlendState.SetDirty(); } // Always render shadowcube when dynamic shadows are enabled diff --git a/D3D11Engine/D3D11ShadowMap.h b/D3D11Engine/D3D11ShadowMap.h index e179f779..a8cef69e 100644 --- a/D3D11Engine/D3D11ShadowMap.h +++ b/D3D11Engine/D3D11ShadowMap.h @@ -119,11 +119,11 @@ class D3D11ShadowMap { } // Bind world shadowmap SRV to a pixel shader slot (binds entire cascade array) - void BindToPixelShader( ID3D11DeviceContext1* context, UINT slot ); + void BindToPixelShader( ID3D11DeviceContext* context, UINT slot ); // Bind the shadowmap sampler to the given slot - void BindSampler( ID3D11DeviceContext1* context, UINT slot ); - void BindSamplerToCS( ID3D11DeviceContext1* context, UINT slot ); + void BindSampler( ID3D11DeviceContext* context, UINT slot ); + void BindSamplerToCS( ID3D11DeviceContext* context, UINT slot ); XRESULT PrepareRender(); @@ -137,14 +137,18 @@ class D3D11ShadowMap { XRESULT DrawWorldShadow(); XRESULT DrawRainShadowmap(); XRESULT DrawPointlightLights(std::vector& lights, RenderToTextureBuffer& color, RenderToTextureBuffer& normals, RenderToTextureBuffer - & specular, RenderToTextureBuffer& depthCopy); + & specular, RenderToTextureBuffer& depthCopy, + ID3D11RenderTargetView* outputRTV, + ID3D11DepthStencilView* dsv ); /** Renders the shadowmaps for the sun using parameter struct */ void RenderShadowmaps( const RenderShadowmapsParams& params ); - XRESULT DrawWorldLights(); + XRESULT DrawWorldLights( ID3D11RenderTargetView* outputRTV ); XRESULT DrawLighting(std::vector& lights, RenderToTextureBuffer& color, RenderToTextureBuffer& normals, RenderToTextureBuffer - & specular, RenderToTextureBuffer& depthCopy); + & specular, RenderToTextureBuffer& depthCopy, + ID3D11RenderTargetView* outputRTV, + ID3D11DepthStencilView* dsv ); void XM_CALLCONV RenderShadowCube( DirectX::FXMVECTOR position, float range, @@ -168,7 +172,6 @@ class D3D11ShadowMap { /* 4 */ { 0.80f, 1.0f }, // Players should really want to use 4 cascades for best quality and furthest }; - D3D11RenderQueue* GetRenderQueue( int cascadeIndex ) { return m_RenderQueues[cascadeIndex].get(); } private: bool ShouldUseAtlas() const; void RecreateShadowSampler(); @@ -190,7 +193,6 @@ class D3D11ShadowMap { Microsoft::WRL::ComPtr m_shadowmapSampler; int m_lastNumCascades = 0; std::array m_CascadeCRs; - std::array, MAX_CSM_CASCADES> m_RenderQueues; std::vector m_CascadeSplits; std::array m_ShouldUpdateCascade; XMFLOAT3 m_WorldShadowPos; diff --git a/D3D11Engine/D3D11StructuredBuffer.h b/D3D11Engine/D3D11StructuredBuffer.h new file mode 100644 index 00000000..a47dcfd5 --- /dev/null +++ b/D3D11Engine/D3D11StructuredBuffer.h @@ -0,0 +1,135 @@ +#pragma once + +#include "pch.h" +#include + +// Templated structured buffer for GPU compute/shader access +template +class D3D11StructuredBuffer { +public: + D3D11StructuredBuffer() : ElementCount( 0 ), MaxElementCount( 0 ) {} + + ~D3D11StructuredBuffer() = default; + + // Initialize the buffer with a maximum capacity + HRESULT Init( ID3D11Device* device, UINT maxElements, bool cpuWrite = true, bool gpuWrite = false ) { + MaxElementCount = maxElements; + ElementCount = 0; + + D3D11_BUFFER_DESC desc = {}; + desc.ByteWidth = sizeof( T ) * maxElements; + desc.StructureByteStride = sizeof( T ); + desc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED; + + if ( cpuWrite ) { + desc.Usage = D3D11_USAGE_DYNAMIC; + desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + } else if ( gpuWrite ) { + desc.Usage = D3D11_USAGE_DEFAULT; + desc.CPUAccessFlags = 0; + desc.BindFlags = D3D11_BIND_SHADER_RESOURCE + | (device->GetFeatureLevel() >= D3D_FEATURE_LEVEL_11_0 ? D3D11_BIND_UNORDERED_ACCESS : 0); + } else { + desc.Usage = D3D11_USAGE_DEFAULT; + desc.CPUAccessFlags = 0; + desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + } + + HRESULT hr = device->CreateBuffer( &desc, nullptr, Buffer.GetAddressOf() ); + if ( FAILED( hr ) ) return hr; + + // Create SRV + D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.Format = DXGI_FORMAT_UNKNOWN; + srvDesc.ViewDimension = D3D11_SRV_DIMENSION_BUFFER; + srvDesc.Buffer.FirstElement = 0; + srvDesc.Buffer.NumElements = maxElements; + + hr = device->CreateShaderResourceView( Buffer.Get(), &srvDesc, SRV.GetAddressOf() ); + if ( FAILED( hr ) ) return hr; + + // Create UAV if GPU writable + if ( gpuWrite ) { + D3D11_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; + uavDesc.Format = DXGI_FORMAT_UNKNOWN; + uavDesc.ViewDimension = D3D11_UAV_DIMENSION_BUFFER; + uavDesc.Buffer.FirstElement = 0; + uavDesc.Buffer.NumElements = maxElements; + + hr = device->CreateUnorderedAccessView( Buffer.Get(), &uavDesc, UAV.GetAddressOf() ); + if ( FAILED( hr ) ) return hr; + } + + return S_OK; + } + + // Update buffer contents (for dynamic buffers) + HRESULT UpdateBuffer( ID3D11DeviceContext* context, const T* data, UINT count ) { + if ( count > MaxElementCount ) { + LogError() << "StructuredBuffer overflow: " << count << " > " << MaxElementCount; + count = MaxElementCount; + } + + ElementCount = count; + + D3D11_MAPPED_SUBRESOURCE mapped; + HRESULT hr = context->Map( Buffer.Get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped ); + if ( FAILED( hr ) ) return hr; + + memcpy( mapped.pData, data, sizeof( T ) * count ); + context->Unmap( Buffer.Get(), 0 ); + + return S_OK; + } + + // Update buffer contents (for default buffers) + void UpdateBufferDefault( ID3D11DeviceContext* context, const T* data, UINT count ) { + if ( count > MaxElementCount ) { + LogError() << "StructuredBuffer overflow: " << count << " > " << MaxElementCount; + count = MaxElementCount; + } + ElementCount = count; + context->UpdateSubresource( Buffer.Get(), 0, nullptr, data, 0, 0 ); + } + + // Bind to vertex shader + void BindToVertexShader( ID3D11DeviceContext* context, UINT slot ) { + context->VSSetShaderResources( slot, 1, SRV.GetAddressOf() ); + } + + // Bind to pixel shader + void BindToPixelShader( ID3D11DeviceContext* context, UINT slot ) { + context->PSSetShaderResources( slot, 1, SRV.GetAddressOf() ); + } + + // Unbind from vertex shader + void UnbindFromVertexShader( ID3D11DeviceContext* context, UINT slot ) { + ID3D11ShaderResourceView* nullSRV = nullptr; + context->VSSetShaderResources( slot, 1, &nullSRV ); + } + + // Bind to compute shader (SRV) + void BindToComputeShader( ID3D11DeviceContext* context, UINT slot ) { + context->CSSetShaderResources( slot, 1, SRV.GetAddressOf() ); + } + + // Unbind from compute shader + void UnbindFromComputeShader( ID3D11DeviceContext* context, UINT slot ) { + ID3D11ShaderResourceView* nullSRV = nullptr; + context->CSSetShaderResources( slot, 1, &nullSRV ); + } + + UINT GetElementCount() const { return ElementCount; } + UINT GetMaxElementCount() const { return MaxElementCount; } + ID3D11Buffer* GetBuffer() const { return Buffer.Get(); } + ID3D11ShaderResourceView* GetSRV() const { return SRV.Get(); } + ID3D11UnorderedAccessView* GetUAV() const { return UAV.Get(); } + +private: + Microsoft::WRL::ComPtr Buffer; + Microsoft::WRL::ComPtr SRV; + Microsoft::WRL::ComPtr UAV; + UINT ElementCount; + UINT MaxElementCount; +}; diff --git a/D3D11Engine/D3D11TextureAtlasManager.h b/D3D11Engine/D3D11TextureAtlasManager.h new file mode 100644 index 00000000..660294a4 --- /dev/null +++ b/D3D11Engine/D3D11TextureAtlasManager.h @@ -0,0 +1,277 @@ +#pragma once +#include "pch.h" + +#include +#include +#include +#include +#include +#include "ConstantBufferStructs.h" + +// Internal struct for bin packing +struct PackItem { + int originalIndex; + UINT width; + UINT height; + UINT x, y, slice; + ID3D11Texture2D* texture; + D3D11_TEXTURE2D_DESC desc; +}; + +class TextureManager { +private: + // Helper to align sizes for power-of-two mip boundaries + static UINT Align( UINT value, UINT alignment ) { + return (value + alignment - 1) & ~(alignment - 1); + } + + // Returns the block size for BC compressed formats (4), or 1 for uncompressed + static UINT GetBlockSize( DXGI_FORMAT fmt ) { + switch ( fmt ) { + case DXGI_FORMAT_BC1_UNORM: case DXGI_FORMAT_BC1_UNORM_SRGB: + case DXGI_FORMAT_BC2_UNORM: case DXGI_FORMAT_BC2_UNORM_SRGB: + case DXGI_FORMAT_BC3_UNORM: case DXGI_FORMAT_BC3_UNORM_SRGB: + return 4; + default: return 1; + } + } + + // Generates the mip levels that are missing from the source texture (item.desc.MipLevels < mipLevels) + // by decompressing the last source mip, running box-filter downsampling, re-compressing to + // atlasFormat, and uploading each new level into the atlas via a temporary immutable texture. + static void GenerateMissingMips( + ID3D11Device* device, ID3D11DeviceContext* context, + ID3D11Texture2D* atlasTextureArray, + const PackItem& item, DXGI_FORMAT atlasFormat, UINT mipLevels ) + { + // Capture the source texture to CPU memory (creates an internal staging copy) + DirectX::ScratchImage captured; + if ( FAILED( DirectX::CaptureTexture( device, context, item.texture, captured ) ) ) + return; + + // Grab the last available mip as the downsampling base + const DirectX::Image* lastMipImg = captured.GetImage( item.desc.MipLevels - 1, 0, 0 ); + if ( !lastMipImg ) return; + + // GenerateMipMaps requires uncompressed input — decompress BC textures first + DirectX::ScratchImage decompressed; + const DirectX::Image* baseImg = lastMipImg; + if ( DirectX::IsCompressed( lastMipImg->format ) ) { + if ( FAILED( DirectX::Decompress( *lastMipImg, DXGI_FORMAT_R8G8B8A8_UNORM, decompressed ) ) ) + return; + baseImg = decompressed.GetImage( 0, 0, 0 ); + } + + // Generate: level 0 = base (already copied to atlas), levels 1..N = the missing mips + UINT levelsToGen = mipLevels - item.desc.MipLevels + 1; + DirectX::ScratchImage mipChain; + if ( FAILED( DirectX::GenerateMipMaps( *baseImg, DirectX::TEX_FILTER_BOX, levelsToGen, mipChain ) ) ) + return; + + // Re-compress the generated levels back to the atlas BC format. + // Try GPU-accelerated compression first; fall back to CPU if unsupported. + const DirectX::ScratchImage* finalChain = &mipChain; + DirectX::ScratchImage recompressed; + if ( DirectX::IsCompressed( atlasFormat ) ) { + HRESULT hr = DirectX::Compress( device, + mipChain.GetImages(), mipChain.GetImageCount(), mipChain.GetMetadata(), + atlasFormat, DirectX::TEX_COMPRESS_DEFAULT, DirectX::TEX_ALPHA_WEIGHT_DEFAULT, + recompressed ); + if ( FAILED( hr ) ) { + // GPU BC compression not supported on this hardware — use CPU path + recompressed = DirectX::ScratchImage{}; + if ( FAILED( DirectX::Compress( + mipChain.GetImages(), mipChain.GetImageCount(), mipChain.GetMetadata(), + atlasFormat, DirectX::TEX_COMPRESS_DEFAULT, DirectX::TEX_ALPHA_WEIGHT_DEFAULT, + recompressed ) ) ) + return; + } + finalChain = &recompressed; + } + + // Upload each new mip via a temporary immutable texture + CopySubresourceRegion + for ( UINT mip = item.desc.MipLevels; mip < mipLevels; ++mip ) { + // chainIdx 0 = the base (already in atlas), so start at 1 + UINT chainIdx = mip - item.desc.MipLevels + 1; + const DirectX::Image* src = finalChain->GetImage( chainIdx, 0, 0 ); + if ( !src || !src->pixels ) continue; + + // BC formats require texture dimensions to be multiples of the block size (4). + // Small mips can be sub-block, so align up to avoid CREATETEXTURE2D_INVALIDDIMENSIONS. + UINT bsz = GetBlockSize( atlasFormat ); + D3D11_TEXTURE2D_DESC tmpDesc = {}; + tmpDesc.Width = Align( (UINT)src->width, bsz ); + tmpDesc.Height = Align( (UINT)src->height, bsz ); + tmpDesc.MipLevels = 1; + tmpDesc.ArraySize = 1; + tmpDesc.Format = src->format; + tmpDesc.SampleDesc.Count = 1; + tmpDesc.Usage = D3D11_USAGE_IMMUTABLE; + tmpDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + + D3D11_SUBRESOURCE_DATA initData = {}; + initData.pSysMem = src->pixels; + initData.SysMemPitch = (UINT)src->rowPitch; + + ID3D11Texture2D* tmpTex = nullptr; + if ( SUCCEEDED( device->CreateTexture2D( &tmpDesc, &initData, &tmpTex ) ) ) { + UINT mipX = item.x >> mip; + UINT mipY = item.y >> mip; + UINT dstSub = D3D11CalcSubresource( mip, item.slice, mipLevels ); + D3D11_BOX box = { 0, 0, 0, tmpDesc.Width, tmpDesc.Height, 1 }; + context->CopySubresourceRegion( atlasTextureArray, dstSub, mipX, mipY, 0, tmpTex, 0, &box ); + tmpTex->Release(); + } + } + } + +public: + struct AtlasResult { + ID3D11Texture2D* atlasTextureArray = nullptr; + ID3D11ShaderResourceView* atlasSRV = nullptr; + std::vector descriptors; + + void Destroy() { + SAFE_RELEASE( atlasSRV ); + SAFE_RELEASE( atlasTextureArray ); + descriptors.clear(); + } + }; + + static AtlasResult CreateAtlasArray( ID3D11Device* device, ID3D11DeviceContext* context, + std::basic_string_view sourceTextures, + // const std::vector& sourceTextures, + UINT atlasSize = 2048, UINT mipLevels = 6 ) + { + if ( sourceTextures.empty() ) return {}; + + AtlasResult result; + result.descriptors.resize( sourceTextures.size() ); + + // Determine format from first texture for alignment calculation. + // For BC formats (blockSize=4), coordinates must remain block-aligned at every mip level. + D3D11_TEXTURE2D_DESC firstDesc; + sourceTextures[0]->GetDesc( &firstDesc ); + DXGI_FORMAT atlasFormat = firstDesc.Format; + + const UINT blockSize = GetBlockSize( atlasFormat ); + const UINT MipAlignment = blockSize * (1 << (mipLevels - 1)); + + std::vector items; + items.reserve( sourceTextures.size() ); + + // 1. Extract info and validate + for ( size_t i = 0; i < sourceTextures.size(); ++i ) { + D3D11_TEXTURE2D_DESC desc; + sourceTextures[i]->GetDesc( &desc ); + + if ( desc.Format != atlasFormat ) { + // For a Texture2DArray, all formats must match. + throw std::runtime_error( "All textures must have the same DXGI_FORMAT." ); + } + + items.push_back( { (int)i, desc.Width, desc.Height, 0, 0, 0, sourceTextures[i], desc}); + } + + // 2. Sort by height descending for optimal shelf-packing + std::sort( items.begin(), items.end(), []( const PackItem& a, const PackItem& b ) { + return a.height > b.height; + } ); + + // 3. CPU Bin Packing (Shelf Packing Algorithm) + UINT currentX = 0, currentY = 0, currentShelfHeight = 0, currentSlice = 0; + + for ( auto& item : items ) { + UINT alignedW = Align( item.width, MipAlignment ); + UINT alignedH = Align( item.height, MipAlignment ); + + // Move to next shelf if it doesn't fit horizontally + if ( currentX + alignedW > atlasSize ) { + currentX = 0; + currentY += Align( currentShelfHeight, MipAlignment ); + currentShelfHeight = 0; + } + + // Move to next array slice if it doesn't fit vertically + if ( currentY + alignedH > atlasSize ) { + currentSlice++; + currentX = 0; + currentY = 0; + currentShelfHeight = 0; + } + + item.x = currentX; + item.y = currentY; + item.slice = currentSlice; + + currentX += alignedW; + currentShelfHeight = std::max( currentShelfHeight, alignedH ); + } + + UINT totalSlices = currentSlice + 1; + + // 4. Create the target Texture2DArray + D3D11_TEXTURE2D_DESC arrayDesc = {}; + arrayDesc.Width = atlasSize; + arrayDesc.Height = atlasSize; + arrayDesc.MipLevels = mipLevels; + arrayDesc.ArraySize = totalSlices; + arrayDesc.Format = atlasFormat; + arrayDesc.SampleDesc.Count = 1; + arrayDesc.SampleDesc.Quality = 0; + arrayDesc.Usage = D3D11_USAGE_DEFAULT; + arrayDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + + if ( FAILED( device->CreateTexture2D( &arrayDesc, nullptr, &result.atlasTextureArray ) ) ) { + throw std::runtime_error( "Failed to create Texture2DArray atlas." ); + } + + // 5. GPU CopySubresourceRegion (Extremely fast, zero CPU-readback) + + for ( const auto& item : items ) { + UINT maxMipsToCopy = std::min( item.desc.MipLevels, mipLevels ); + + for ( UINT mip = 0; mip < maxMipsToCopy; ++mip ) { + // Calculate scaled coordinates for the current mip level + UINT mipX = item.x >> mip; + UINT mipY = item.y >> mip; + + // Mip source & destination indices + UINT srcSub = D3D11CalcSubresource( mip, 0, item.desc.MipLevels ); + UINT dstSub = D3D11CalcSubresource( mip, item.slice, mipLevels ); + + context->CopySubresourceRegion( + result.atlasTextureArray, dstSub, + mipX, mipY, 0, + item.texture, srcSub, + nullptr // nullptr means copy the whole subresource + ); + } + + // 5b. Fill missing MIP levels using DirectXTex bilinear downsampling + re-compression. + if ( item.desc.MipLevels < mipLevels ) + GenerateMissingMips( device, context, result.atlasTextureArray, item, atlasFormat, mipLevels ); + + // Write out descriptors in the *original* input order + TextureDescriptor& outDesc = result.descriptors[item.originalIndex]; + outDesc.slice = item.slice; + outDesc.uStart = (float)item.x / atlasSize; + outDesc.vStart = (float)item.y / atlasSize; + outDesc.uEnd = (float)(item.x + item.width) / atlasSize; + outDesc.vEnd = (float)(item.y + item.height) / atlasSize; + } + + // 6. Create SRV + D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.Format = atlasFormat; + srvDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2DARRAY; + srvDesc.Texture2DArray.MostDetailedMip = 0; + srvDesc.Texture2DArray.MipLevels = mipLevels; + srvDesc.Texture2DArray.FirstArraySlice = 0; + srvDesc.Texture2DArray.ArraySize = totalSlices; + + device->CreateShaderResourceView( result.atlasTextureArray, &srvDesc, &result.atlasSRV); + + return result; + } +}; diff --git a/D3D11Engine/D3D11VobAtlasPass.cpp b/D3D11Engine/D3D11VobAtlasPass.cpp new file mode 100644 index 00000000..faef4d0d --- /dev/null +++ b/D3D11Engine/D3D11VobAtlasPass.cpp @@ -0,0 +1,670 @@ +#include "D3D11VobAtlasPass.h" +#include "D3D11GraphicsEngine.h" + +#include "D3D11ShaderManager.h" +#include "D3D11VShader.h" +#include "D3D11PShader.h" +#include "D3D11CShader.h" +#include "D3D11ConstantBuffer.h" +#include "GothicAPI.h" +#include "GSky.h" +#include "RenderToTextureBuffer.h" +#include "WorldObjects.h" +#include "VertexTypes.h" +#include "zCTexture.h" +#include "zCMaterial.h" +#include "zCVob.h" +#include "zCVisual.h" + +#include +#include + +// ----- globals defined in D3D11GraphicsEngine.cpp ----- +extern bool SupportTextureAtlases; +extern float vobAnimation_WindStrength; +namespace { + constexpr DXGI_FORMAT VERTEX_INDEX_DXGI_FORMAT = sizeof( VERTEX_INDEX ) == sizeof( unsigned short ) ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R32_UINT; +} + +typedef void( __cdecl* PFN_DRAWMULTIINDEXEDINSTANCEDINDIRECT )( + ID3D11DeviceContext* context, unsigned int drawCount, + ID3D11Buffer* buffer, unsigned int alignedByteOffsetForArgs, + unsigned int alignedByteStrideForArgs ); +extern PFN_DRAWMULTIINDEXEDINSTANCEDINDIRECT DrawMultiIndexedInstancedIndirect; + +// ------------------------------------------------------- + +D3D11VobAtlasPass::D3D11VobAtlasPass( D3D11GraphicsEngine* engine ) + : m_Engine( engine ) { +} + +// ============================================================ +// Build – entry point called from OnWorldLoaded +// ============================================================ +void D3D11VobAtlasPass::Build() { + // Reset everything + for ( size_t i = 0; i < TEXTURE_ATLAS_MAX; i++ ) + m_TextureAtlasses[(DXGI_FORMAT)i].Destroy(); + m_TextureAtlasLookup.clear(); + m_AtlasDrawGroups.clear(); + m_VobGPUDataCPU.clear(); + m_VobToGPUIndex.clear(); + + if ( !SupportTextureAtlases || + !Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.FeatureSet.EnableAtlasStaticVobs ) { + return; + } + + BuildTextureAtlasses(); + + if ( m_TextureAtlasLookup.empty() ) + return; + + BuildGeometryBuffers(); + BuildGPUCullingBuffers(); +} + +// ============================================================ +// BuildTextureAtlasses +// ============================================================ +void D3D11VobAtlasPass::BuildTextureAtlasses() { + struct TextureInfo { + zCTexture* gothicTexture; + DXGI_FORMAT Format; + Microsoft::WRL::ComPtr Texture2D; + }; + + std::unordered_set seenTextures; + std::vector uniqueTextures; + + for ( auto [_, vobInfo] : Engine::GAPI->GetStaticMeshVisuals() ) { + for ( auto& byTex : vobInfo->MeshesByTexture ) { + zCTexture* tex = byTex.first.Texture; + + if ( !tex ) { + auto vis = vobInfo->Visual; + LogError() + << "Texture not found for visual " << vobInfo->VisualName + << " Visual Type: " << vis->GetVisualType(); + + continue; + } + + if ( !seenTextures.insert( tex ).second ) { + continue; + } + + auto cachedState = tex->CacheIn( -1 ); + if ( cachedState != zRES_CACHED_IN ) { + LogError() << "Texture " << tex->GetName() << " was not cached in"; + continue; + } + + auto surface = tex->GetSurface(); + if ( !surface || !surface->IsSurfaceReady() ) { + LogError() << "Texture " << tex->GetName() << " surface not ready"; + continue; + } + + auto engineTex = surface->GetEngineTexture(); + if ( !engineTex ) { + LogError() << "Texture " << tex->GetName() << " no engine texture"; + continue; + } + + D3D11_TEXTURE2D_DESC desc; + engineTex->GetTextureObject()->GetDesc( &desc ); + if ( desc.Format < 0 || desc.Format >= TEXTURE_ATLAS_MAX ) { + LogError() << "Texture " << tex->GetName() << " has unsupported format for atlas: " << desc.Format; + continue; + } + LogInfo() << "Texture for atlas: " << tex->GetName() << " Format: " << desc.Format; + uniqueTextures.push_back( { tex, desc.Format, engineTex->GetTextureObject() } ); + } + } + + // Sort by format so same-format textures are contiguous + std::sort( uniqueTextures.begin(), uniqueTextures.end(), + []( const TextureInfo& a, const TextureInfo& b ) { return a.Format < b.Format; } ); + + // Create one Texture2DArray atlas per contiguous format range + size_t rangeStart = 0; + while ( rangeStart < uniqueTextures.size() ) { + DXGI_FORMAT fmt = uniqueTextures[rangeStart].Format; + size_t rangeEnd = rangeStart; + while ( rangeEnd < uniqueTextures.size() && uniqueTextures[rangeEnd].Format == fmt ) + rangeEnd++; + + std::vector texPtrs; + texPtrs.reserve( rangeEnd - rangeStart ); + for ( size_t i = rangeStart; i < rangeEnd; i++ ) + texPtrs.push_back( uniqueTextures[i].Texture2D.Get() ); + + std::basic_string_view txView( texPtrs.data(), texPtrs.size() ); + TextureManager::AtlasResult atlas = TextureManager::CreateAtlasArray( + m_Engine->GetDevice().Get(), m_Engine->GetContext().Get(), txView, 2048, 6 ); + + for ( size_t i = 0; i < texPtrs.size(); i++ ) { + m_TextureAtlasLookup[uniqueTextures[rangeStart + i].gothicTexture] = { + fmt, atlas.descriptors[i] + }; + } + m_TextureAtlasses[fmt] = atlas; + rangeStart = rangeEnd; + } + + LogInfo() << "VOB Atlas: " << uniqueTextures.size() << " unique textures, " + << m_TextureAtlasLookup.size() << " mapped"; +} + +// ============================================================ +// BuildGeometryBuffers +// ============================================================ +void D3D11VobAtlasPass::BuildGeometryBuffers() { + std::vector allVertices; + std::vector allIndices; + std::map groupsByFormat; + std::unordered_set processedMeshes; + + // Pre-count to avoid incremental reallocation + { + size_t totalVertices = 0, totalIndices = 0; + std::unordered_set counted; + for ( auto const& [proto, visual] : Engine::GAPI->GetStaticMeshVisuals() ) { + for ( auto const& [meshKey, meshList] : visual->MeshesByTexture ) { + if ( m_TextureAtlasLookup.find( meshKey.Texture ) == m_TextureAtlasLookup.end() ) + continue; + for ( MeshInfo* mi : meshList ) { + if ( counted.insert( mi ).second ) { + totalVertices += mi->Vertices.size(); + totalIndices += mi->Indices.size(); + } + } + } + } + allVertices.reserve( totalVertices ); + allIndices.reserve( totalIndices ); + } + + for ( auto const& [proto, visual] : Engine::GAPI->GetStaticMeshVisuals() ) { + for ( auto const& [meshKey, meshList] : visual->MeshesByTexture ) { + auto it = m_TextureAtlasLookup.find( meshKey.Texture ); + if ( it == m_TextureAtlasLookup.end() ) { + LogWarn() << "Texture for mesh not found: " << (meshKey.Texture ? meshKey.Texture->GetName() : "unknown"); + continue; + } + + const TextureAtlasLookup& lookup = it->second; + auto& group = groupsByFormat[lookup.atlasFormat]; + group.format = lookup.atlasFormat; + + for ( MeshInfo* mi : meshList ) { + if ( !processedMeshes.insert( mi ).second ) + continue; + + UINT baseVertex = static_cast(allVertices.size()); + UINT startIndex = static_cast(allIndices.size()); + + allVertices.insert( allVertices.end(), mi->Vertices.begin(), mi->Vertices.end() ); + allIndices.insert( allIndices.end(), mi->Indices.begin(), mi->Indices.end() ); + + StaticSubmeshEntry entry; + entry.indexCount = static_cast(mi->Indices.size()); + entry.startIndexLocation = startIndex; + entry.baseVertexLocation = static_cast(baseVertex); + entry.atlasDesc = lookup.descriptor; + entry.visual = visual; + group.submeshes.push_back( entry ); + + D3D11_DRAW_INDEXED_INSTANCED_INDIRECT_ARGS args = {}; + args.IndexCountPerInstance = entry.indexCount; + args.InstanceCount = 0; + args.StartIndexLocation = entry.startIndexLocation; + args.BaseVertexLocation = entry.baseVertexLocation; + args.StartInstanceLocation = 0; + group.indirectArgs.push_back( args ); + } + } + } + + if ( allVertices.empty() ) { + LogWarn() << "D3D11VobAtlasPass::BuildGeometryBuffers: No vertices to process"; + return; + } + + m_StaticGlobalVertexBuffer = std::make_unique(); + m_StaticGlobalVertexBuffer->Init( + allVertices.data(), + static_cast(allVertices.size() * sizeof( ExVertexStruct )), + D3D11VertexBuffer::B_VERTEXBUFFER, + D3D11VertexBuffer::U_IMMUTABLE, + D3D11VertexBuffer::CA_NONE ); + + m_StaticGlobalIndexBuffer = std::make_unique(); + m_StaticGlobalIndexBuffer->Init( + allIndices.data(), + static_cast(allIndices.size() * sizeof( VERTEX_INDEX )), + D3D11VertexBuffer::B_INDEXBUFFER, + D3D11VertexBuffer::U_IMMUTABLE, + D3D11VertexBuffer::CA_NONE ); + + UINT maxInstanceIds = static_cast(m_Engine->m_StaticVobs.size() * 4); + if ( maxInstanceIds < 4096 ) + maxInstanceIds = 4096; + std::vector instanceIds( maxInstanceIds ); + for ( uint32_t i = 0; i < maxInstanceIds; i++ ) + instanceIds[i] = i; + + m_GlobalInstanceIdBuffer = std::make_unique(); + m_GlobalInstanceIdBuffer->Init( + instanceIds.data(), + static_cast(instanceIds.size() * sizeof( uint32_t )), + D3D11VertexBuffer::B_VERTEXBUFFER, + D3D11VertexBuffer::U_IMMUTABLE, + D3D11VertexBuffer::CA_NONE ); + + m_AtlasDrawGroups.clear(); + for ( auto& [fmt, group] : groupsByFormat ) { + if ( group.indirectArgs.empty() ) + continue; + + UINT bufSize = static_cast(group.indirectArgs.size() * sizeof( D3D11_DRAW_INDEXED_INSTANCED_INDIRECT_ARGS )); + group.indirectBuffer = std::make_unique(); + group.indirectBuffer->Init( + group.indirectArgs.data(), bufSize, + D3D11IndirectBuffer::B_VERTEXBUFFER, + D3D11IndirectBuffer::U_DYNAMIC, + D3D11IndirectBuffer::CA_WRITE ); + + m_AtlasDrawGroups.push_back( std::move( group ) ); + } + + LogInfo() << "VOB Atlas geometry: " << allVertices.size() << " vertices, " + << allIndices.size() << " indices, " + << m_AtlasDrawGroups.size() << " atlas groups, " + << processedMeshes.size() << " unique submeshes"; +} + +// ============================================================ +// BuildGPUCullingBuffers +// ============================================================ +void D3D11VobAtlasPass::BuildGPUCullingBuffers() { + if ( m_AtlasDrawGroups.empty() || m_Engine->m_StaticVobs.empty() ) + return; + + // --- 1. Build visual -> vob-count mapping --- + std::unordered_map vobsPerVisual; + std::unordered_map> vobIndicesByVisual; + + for ( size_t i = 0; i < m_Engine->m_StaticVobs.size(); i++ ) { + auto* visual = reinterpret_cast(m_Engine->m_StaticVobs[i]->VisualInfo); + vobsPerVisual[visual]++; + vobIndicesByVisual[visual].push_back( i ); + } + + // --- 2. Build merged indirect args + SubmeshGPUData --- + std::vector mergedArgs; + std::unordered_map> visualSubmeshMap; + { + size_t totalSubmeshes = 0; + for ( const auto& group : m_AtlasDrawGroups ) totalSubmeshes += group.submeshes.size(); + mergedArgs.reserve( totalSubmeshes ); + } + + UINT runningInstanceOffset = 0; + UINT globalArgIndex = 0; + + for ( auto& group : m_AtlasDrawGroups ) { + group.mergedArgsOffset = static_cast(mergedArgs.size() * sizeof( D3D11_DRAW_INDEXED_INSTANCED_INDIRECT_ARGS )); + group.mergedArgsCount = static_cast(group.indirectArgs.size()); + + for ( size_t si = 0; si < group.submeshes.size(); si++ ) { + const auto& submesh = group.submeshes[si]; + MeshVisualInfo* visual = submesh.visual; + UINT maxInstances = vobsPerVisual.count( visual ) ? vobsPerVisual[visual] : 0; + + D3D11_DRAW_INDEXED_INSTANCED_INDIRECT_ARGS args = {}; + args.IndexCountPerInstance = submesh.indexCount; + args.InstanceCount = 0; + args.StartIndexLocation = submesh.startIndexLocation; + args.BaseVertexLocation = submesh.baseVertexLocation; + args.StartInstanceLocation = runningInstanceOffset; + mergedArgs.push_back( args ); + + SubmeshGPUData smGPU = {}; + smGPU.slice = submesh.atlasDesc.slice; + smGPU.uStart = submesh.atlasDesc.uStart; + smGPU.vStart = submesh.atlasDesc.vStart; + smGPU.uEnd = submesh.atlasDesc.uEnd; + smGPU.vEnd = submesh.atlasDesc.vEnd; + smGPU.argIndex = globalArgIndex; + smGPU.instanceBaseOffset = runningInstanceOffset; + smGPU.globalSourceIndex = 0; + + visualSubmeshMap[visual].push_back( smGPU ); + + runningInstanceOffset += maxInstances; + globalArgIndex++; + } + } + + m_TotalMaxInstances = runningInstanceOffset; + + // --- 3. Flatten per-visual submesh entries --- + struct VisualSubmeshRange { UINT start; UINT count; }; + std::unordered_map visualSubmeshRanges; + std::vector submeshGPU; + submeshGPU.reserve( mergedArgs.size() ); + + for ( auto& [visual, entries] : visualSubmeshMap ) { + UINT start = static_cast(submeshGPU.size()); + submeshGPU.insert( submeshGPU.end(), entries.begin(), entries.end() ); + visualSubmeshRanges[visual] = { start, static_cast(entries.size()) }; + } + + // --- 4. Build VobGPUData --- + std::vector vobGPU; + vobGPU.reserve( m_Engine->m_StaticVobs.size() ); + + for ( size_t i = 0; i < m_Engine->m_StaticVobs.size(); i++ ) { + VobInfo* v = m_Engine->m_StaticVobs[i]; + auto* visual = reinterpret_cast(v->VisualInfo); + + VobGPUData data = {}; + DirectX::BoundingBox bb = Frustum::BBoxFromzTBBox3D( v->Vob->GetBBox() ); + data.aabbCenter = bb.Center; + data.aabbExtent = bb.Extents; + data.world = v->WorldMatrix; + data.prevWorld = v->WorldMatrix; + data.color = v->GroundColor; + + zTAnimationMode aniMode = v->Vob->GetVisualAniMode(); + if ( aniMode != zVISUAL_ANIMODE_NONE ) { + data.aniModeStrength = v->Vob->GetVisualAniModeStrength(); + data.canBeAffectedByPlayer = (!v->Vob->GetDynColl() ? 1.0f : 0.0f); + } else { + data.aniModeStrength = 0.0f; + data.canBeAffectedByPlayer = 0.0f; + } + + data.minHeight = visual->BBox.Min.y; + data.maxHeight = visual->BBox.Max.y; + + auto it = visualSubmeshRanges.find( visual ); + if ( it != visualSubmeshRanges.end() ) { + data.submeshStart = it->second.start; + data.submeshCount = it->second.count; + } + vobGPU.push_back( data ); + } + + // Keep CPU-side copy and build vob-pointer lookup for runtime removal + m_VobGPUDataCPU = vobGPU; + m_VobToGPUIndex.clear(); + m_VobToGPUIndex.reserve( m_Engine->m_StaticVobs.size() ); + for ( size_t i = 0; i < m_Engine->m_StaticVobs.size(); i++ ) { + m_VobToGPUIndex[m_Engine->m_StaticVobs[i]->Vob] = static_cast(i); + } + + // --- 5. Upload to GPU --- + auto* device = m_Engine->GetDevice().Get(); + auto* context = m_Engine->GetContext().Get(); + + m_VobGPUBuffer = std::make_unique>(); + m_VobGPUBuffer->Init( device, static_cast(vobGPU.size()), false, false ); + m_VobGPUBuffer->UpdateBufferDefault( context, vobGPU.data(), static_cast(vobGPU.size()) ); + + m_SubmeshGPUBuffer = std::make_unique>(); + m_SubmeshGPUBuffer->Init( device, static_cast(submeshGPU.size()), false, false ); + m_SubmeshGPUBuffer->UpdateBufferDefault( context, submeshGPU.data(), static_cast(submeshGPU.size()) ); + + UINT instanceCapacity = std::max( m_TotalMaxInstances, 1u ); + m_InstanceBufferGPU = std::make_unique>(); + m_InstanceBufferGPU->Init( device, instanceCapacity, false, true ); + + UINT argsSize = static_cast(mergedArgs.size() * sizeof( D3D11_DRAW_INDEXED_INSTANCED_INDIRECT_ARGS )); + m_MergedIndirectArgs = std::make_unique(); + m_MergedIndirectArgs->Init( + mergedArgs.data(), argsSize, + D3D11IndirectBuffer::B_UNORDERED_ACCESS, + D3D11IndirectBuffer::U_DEFAULT, + D3D11IndirectBuffer::CA_NONE ); + + m_MergedArgsReset = mergedArgs; + + D3D11_BUFFER_DESC templateDesc = {}; + templateDesc.ByteWidth = argsSize; + templateDesc.Usage = D3D11_USAGE_DEFAULT; + templateDesc.BindFlags = 0; + templateDesc.MiscFlags = D3D11_RESOURCE_MISC_DRAWINDIRECT_ARGS; + + D3D11_SUBRESOURCE_DATA templateData = {}; + templateData.pSysMem = mergedArgs.data(); + device->CreateBuffer( &templateDesc, &templateData, m_IndirectArgsTemplate.ReleaseAndGetAddressOf() ); + + CullConstants initCB = {}; + m_CullConstantBuffer = std::make_unique( sizeof( CullConstants ), &initCB ); + + if ( m_TotalMaxInstances > 0 ) { + std::vector instanceIds( m_TotalMaxInstances ); + for ( uint32_t i = 0; i < m_TotalMaxInstances; i++ ) + instanceIds[i] = i; + + m_GlobalInstanceIdBuffer = std::make_unique(); + m_GlobalInstanceIdBuffer->Init( + instanceIds.data(), + static_cast(instanceIds.size() * sizeof( uint32_t )), + D3D11VertexBuffer::B_VERTEXBUFFER, + D3D11VertexBuffer::U_IMMUTABLE, + D3D11VertexBuffer::CA_NONE ); + } + + LogInfo() << "VOB Atlas GPU Culling: " << vobGPU.size() << " vobs, " + << submeshGPU.size() << " submesh entries, " + << mergedArgs.size() << " indirect args, " + << m_TotalMaxInstances << " max instances"; +} + +// ============================================================ +// OnVobRemovedFromWorld – hide a removed vob without rebuild +// ============================================================ +void D3D11VobAtlasPass::OnVobRemovedFromWorld( zCVob* vob ) { + auto it = m_VobToGPUIndex.find( vob ); + if ( it == m_VobToGPUIndex.end() ) + return; + + UINT idx = it->second; + m_VobToGPUIndex.erase( it ); + + // Zero submeshCount so the CS never emits instances for this vob + m_VobGPUDataCPU[idx].submeshCount = 0; + + // Upload only the modified element to the GPU via UpdateSubresource + D3D11_BOX + D3D11_BOX box = {}; + box.left = idx * sizeof( VobGPUData ); + box.right = box.left + sizeof( VobGPUData ); + box.top = 0; + box.bottom = 1; + box.front = 0; + box.back = 1; + m_Engine->GetContext()->UpdateSubresource( + m_VobGPUBuffer->GetBuffer(), 0, &box, + &m_VobGPUDataCPU[idx], 0, 0 ); +} + +// ============================================================ +// Draw – per-frame GPU-cull + indirect draw +// ============================================================ +XRESULT D3D11VobAtlasPass::Draw( const Frustum& frustum, bool bindPS ) { + if ( m_AtlasDrawGroups.empty() || !m_VobGPUBuffer || + !m_StaticGlobalVertexBuffer || !m_StaticGlobalIndexBuffer ) + return XR_SUCCESS; + + auto _ = m_Engine->RecordGraphicsEvent( L"DrawVOBsIndirect" ); + auto& context = m_Engine->GetContext(); + + // --- 0. Build Hi-Z pyramid for occlusion culling (main pass only) --- + const bool useHiZ = bindPS && m_Engine->m_HiZTexture && m_Engine->m_HiZSRV; + if ( useHiZ ) { + m_Engine->CopyDepthStencil(); + m_Engine->BuildHiZPyramid(); + } + + // --- 1. Reset indirect args InstanceCounts --- + context->CopyResource( m_MergedIndirectArgs->GetIndirectBuffer().Get(), + m_IndirectArgsTemplate.Get() ); + + // --- 2. Update cull constant buffer --- + CullConstants cb = {}; + memcpy( cb.frustumPlanes, frustum.GetPlanes().data(), 6 * sizeof( XMFLOAT4 ) ); + cb.cameraPosition = Engine::GAPI->GetCameraPosition(); + cb.drawDistance = Engine::GAPI->GetRendererState().RendererSettings.OutdoorVobDrawRadius; + cb.globalWindStrength = vobAnimation_WindStrength; + cb.windAdvanced = (Engine::GAPI->GetRendererState().RendererSettings.WindQuality + == GothicRendererSettings::EWindQuality::WIND_QUALITY_ADVANCED) ? 1 : 0; + cb.numVobs = static_cast(m_Engine->m_StaticVobs.size()); + cb.feedbackFrameNumber = 0; + + if ( useHiZ ) { + cb.enableHiZ = 1; + cb.hiZMipCount = m_Engine->m_HiZMipCount; + cb.hiZWidth = static_cast(m_Engine->DepthStencilBuffer->GetSizeX()); + cb.hiZHeight = static_cast(m_Engine->DepthStencilBuffer->GetSizeY()); + + XMMATRIX view = Engine::GAPI->GetViewMatrixXM(); + auto& projF = Engine::GAPI->GetProjectionMatrix(); + XMStoreFloat4x4( &cb.viewProjection, XMMatrixMultiply( view, XMLoadFloat4x4( &projF ) ) ); + } else { + cb.enableHiZ = 0; + cb.hiZMipCount = 0; + cb.hiZWidth = 0.0f; + cb.hiZHeight = 0.0f; + XMStoreFloat4x4( &cb.viewProjection, XMMatrixIdentity() ); + } + + m_CullConstantBuffer->UpdateBuffer( &cb ); + m_CullConstantBuffer->BindToComputeShader( 0 ); + + // --- 3. Dispatch CS_CullVobs --- + auto cullCS = m_Engine->ShaderManager->GetCShader( CShaderID::CS_CullVobs ); + if ( !cullCS ) + return XR_SUCCESS; + cullCS->Apply(); + + ID3D11ShaderResourceView* srvs[2] = { + m_VobGPUBuffer->GetSRV(), + m_SubmeshGPUBuffer->GetSRV() + }; + context->CSSetShaderResources( 0, 2, srvs ); + + if ( useHiZ ) { + ID3D11ShaderResourceView* hiZSRV = m_Engine->m_HiZSRV.Get(); + context->CSSetShaderResources( 2, 1, &hiZSRV ); + } + + ID3D11UnorderedAccessView* uavs[2] = { + m_InstanceBufferGPU->GetUAV(), + m_MergedIndirectArgs->GetUnorderedAccessView().Get() + }; + context->CSSetUnorderedAccessViews( 0, 2, uavs, nullptr ); + + UINT numGroups = (static_cast(m_Engine->m_StaticVobs.size()) + 63) / 64; + context->Dispatch( numGroups, 1, 1 ); + + // Unbind CS resources + ID3D11ShaderResourceView* nullSRV[3] = { nullptr, nullptr, nullptr }; + ID3D11UnorderedAccessView* nullUAV[2] = { nullptr, nullptr }; + context->CSSetShaderResources( 0, 3, nullSRV ); + context->CSSetUnorderedAccessViews( 0, 2, nullUAV, nullptr ); + context->CSSetShader( nullptr, nullptr, 0 ); + + // --- 4. Bind global geometry --- + UINT strides[2] = { sizeof( ExVertexStruct ), sizeof( uint32_t ) }; + UINT offsets[2] = { 0, 0 }; + ID3D11Buffer* vbs[2] = { + m_StaticGlobalVertexBuffer->GetVertexBuffer().Get(), + m_GlobalInstanceIdBuffer->GetVertexBuffer().Get() + }; + context->IASetVertexBuffers( 0, 2, vbs, strides, offsets ); + context->IASetIndexBuffer( m_StaticGlobalIndexBuffer->GetVertexBuffer().Get(), + VERTEX_INDEX_DXGI_FORMAT, 0 ); + context->IASetPrimitiveTopology( D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST ); + + // --- 5. Bind instance structured buffer to VS t1 --- + ID3D11ShaderResourceView* instSRV = m_InstanceBufferGPU->GetSRV(); + context->VSSetShaderResources( 1, 1, &instSRV ); + + // --- 6. Set vertex shader --- + m_Engine->SetActiveVertexShader( VShaderID::VS_ExInstancedObjIndirectAtlas ); + m_Engine->SetupVS_ExMeshDrawCall(); + m_Engine->SetupVS_ExConstantBuffer(); + + VS_ExConstantBuffer_Wind windBuff{}; + m_Engine->ApplyWindProps( windBuff ); + m_Engine->ActiveVS->GetBuffer(1).Update( &windBuff ).Bind(); + + if ( bindPS ) + context->PSSetShaderResources( 4, 1, m_Engine->ReflectionCube.GetAddressOf() ); + + m_Engine->ActiveVS->Apply(); + + // --- 7. Draw per atlas group --- + MaterialInfo defMaterial{}; + GSky* sky = Engine::GAPI->GetSky(); + + context->PSSetShader( nullptr, nullptr, 0 ); + auto lastPs = PShaderID::COUNT; + GraphicsShaderConstantBuffer buffersToBind[3] = {}; + + const PShaderID alphaTestPS = m_Engine->GetRenderingStage() == D3D11ENGINE_RENDER_STAGE::DES_SHADOWMAP + ? PShaderID::PS_DiffuseAtlasAlphaTestShadows + : PShaderID::PS_DiffuseAtlasAlphaTest; + + for ( auto& group : m_AtlasDrawGroups ) { + ID3D11ShaderResourceView* srv = m_TextureAtlasses[group.format].atlasSRV; + if ( !srv ) + continue; + + const bool needsPS = bindPS || (group.format == DXGI_FORMAT_BC2_UNORM); + + if ( needsPS ) { + context->PSSetShaderResources( 0, 1, &srv ); + + auto newPs = (bindPS && group.format != DXGI_FORMAT_BC2_UNORM) + ? PShaderID::PS_DiffuseAtlas + : alphaTestPS; + + if ( newPs != lastPs ) { + m_Engine->SetActivePixelShader( newPs ); + m_Engine->ActivePS->Apply(); + + buffersToBind[0] = m_Engine->ActivePS->GetBuffer( 0 ).Bind(); + buffersToBind[1] = m_Engine->ActivePS->GetBuffer( 1 ).Bind(); + buffersToBind[2] = m_Engine->ActivePS->GetBuffer( 2 ).Bind(); + m_Engine->OutdoorVobsConstantBuffer->BindToPixelShader( 3 ); + + lastPs = newPs; + } + + buffersToBind[0].Update( &Engine::GAPI->GetRendererState().GraphicsState ); + buffersToBind[1].Update( &sky->GetAtmosphereCB() ); + buffersToBind[2].Update( &defMaterial.buffer ); + } else if ( lastPs != PShaderID::COUNT ) { + context->PSSetShader( nullptr, nullptr, 0 ); + lastPs = PShaderID::COUNT; + } + + DrawMultiIndexedInstancedIndirect( + context.Get(), + group.mergedArgsCount, + m_MergedIndirectArgs->GetIndirectBuffer().Get(), + group.mergedArgsOffset, + sizeof( D3D11_DRAW_INDEXED_INSTANCED_INDIRECT_ARGS ) ); + } + + // Unbind instance buffer from VS + ID3D11ShaderResourceView* nullVSSRV = nullptr; + context->VSSetShaderResources( 1, 1, &nullVSSRV ); + + return XR_SUCCESS; +} diff --git a/D3D11Engine/D3D11VobAtlasPass.h b/D3D11Engine/D3D11VobAtlasPass.h new file mode 100644 index 00000000..05a29abe --- /dev/null +++ b/D3D11Engine/D3D11VobAtlasPass.h @@ -0,0 +1,88 @@ +#pragma once +#include "D3D11AtlasTypes.h" +#include "D3D11StructuredBuffer.h" +#include "D3D11VertexBuffer.h" +#include "D3D11ConstantBuffer.h" +#include "VobCulling.h" + +#include +#include +#include +#include +#include + +class D3D11GraphicsEngine; +class Frustum; +class zCTexture; + +/** + * Encapsulates all texture-atlas-based GPU-driven rendering for static VOBs. + * + * Responsibilities: + * - Building per-format Texture2DArray atlases from static-VOB diffuse textures + * - Building the merged global VB/IB and per-submesh indirect-args buffer + * - Building the GPU structured buffers used by CS_CullVobs + * - Executing the GPU-culling compute pass and the subsequent indirect draw + * + * The engine keeps one instance of this class. Call Build() when a new world + * is loaded (OnWorldLoaded), and Draw() every frame in place of the old + * DrawVOBsIndirect(). + */ +class D3D11VobAtlasPass { + friend class D3D11GraphicsEngine; +public: + explicit D3D11VobAtlasPass( D3D11GraphicsEngine* engine ); + + /** (Re-)build atlases, geometry buffers, and GPU culling buffers. + * Called from D3D11GraphicsEngine::OnWorldLoaded(). */ + void Build(); + + /** GPU-cull static VOBs and draw them with indirect multi-draw. + * bindPS=false is used in shadow passes to skip the pixel shader. */ + XRESULT Draw( const Frustum& frustum, bool bindPS = true ); + + /** Mark a removed vob as invisible in the GPU buffer without rebuilding. */ + void OnVobRemovedFromWorld( class zCVob* vob ); + + /** True once Build() has completed and at least one draw group exists. */ + bool IsReady() const { return !m_AtlasDrawGroups.empty(); } + + /** Atlas lookup (read-only access for other systems if needed). */ + const std::unordered_map& GetAtlasLookup() const { + return m_TextureAtlasLookup; + } + +private: + D3D11GraphicsEngine* m_Engine; + + // ---- Atlas textures ---- + std::array m_TextureAtlasses{}; + std::unordered_map m_TextureAtlasLookup; + + // ---- Global geometry ---- + std::unique_ptr m_StaticGlobalVertexBuffer; + std::unique_ptr m_StaticGlobalIndexBuffer; + std::unique_ptr m_GlobalInstanceIdBuffer; + std::vector m_AtlasDrawGroups; + + // (legacy slot – not yet used but reserved for future streaming) + std::unique_ptr> m_StaticVobInstanceBuffer; + + // ---- GPU culling buffers ---- + std::unique_ptr> m_VobGPUBuffer; + std::unique_ptr> m_SubmeshGPUBuffer; + std::unique_ptr> m_InstanceBufferGPU; + std::unique_ptr m_MergedIndirectArgs; + Microsoft::WRL::ComPtr m_IndirectArgsTemplate; + std::unique_ptr m_CullConstantBuffer; + std::vector m_MergedArgsReset; + UINT m_TotalMaxInstances = 0; + + // CPU-side cache for targeted per-vob GPU updates on removal + std::vector m_VobGPUDataCPU; + std::unordered_map m_VobToGPUIndex; + + void BuildTextureAtlasses(); + void BuildGeometryBuffers(); + void BuildGPUCullingBuffers(); +}; diff --git a/D3D11Engine/D3D7/MyDirect3DDevice7.h b/D3D11Engine/D3D7/MyDirect3DDevice7.h index 874cab0b..ba75e5eb 100644 --- a/D3D11Engine/D3D7/MyDirect3DDevice7.h +++ b/D3D11Engine/D3D7/MyDirect3DDevice7.h @@ -246,15 +246,15 @@ class MyDirect3DDevice7 : public IDirect3DDevice7 { } break; - case D3DRENDERSTATE_ZENABLE: state.DepthState.DepthBufferEnabled = Value != 0; state.DepthState.SetDirty(); break; + case D3DRENDERSTATE_ZENABLE: state.DepthState.DepthBufferEnabled = Value != 0; break; case D3DRENDERSTATE_ALPHATESTENABLE: state.GraphicsState.SetGraphicsSwitch( GSWITCH_ALPHAREF, Value != 0 ); break; - case D3DRENDERSTATE_SRCBLEND: state.BlendState.SrcBlend = static_cast(Value); state.BlendState.SetDirty(); break; - case D3DRENDERSTATE_DESTBLEND: state.BlendState.DestBlend = static_cast(Value); state.BlendState.SetDirty(); break; - //case D3DRENDERSTATE_CULLMODE: state.RasterizerState.CullMode = static_cast(Value); state.RasterizerState.SetDirty(); break; - case D3DRENDERSTATE_ZFUNC: state.DepthState.DepthBufferCompareFunc = static_cast(Value); state.DepthState.SetDirty(); break; + case D3DRENDERSTATE_SRCBLEND: state.BlendState.SrcBlend = static_cast(Value); break; + case D3DRENDERSTATE_DESTBLEND: state.BlendState.DestBlend = static_cast(Value); break; + //case D3DRENDERSTATE_CULLMODE: state.RasterizerState.CullMode = static_cast(Value); break; + case D3DRENDERSTATE_ZFUNC: state.DepthState.DepthBufferCompareFunc = static_cast(Value); break; case D3DRENDERSTATE_ALPHAREF: state.GraphicsState.FF_AlphaRef = static_cast(Value) / 255.0f; break; // Ref for masked - case D3DRENDERSTATE_ALPHABLENDENABLE: state.BlendState.BlendEnabled = Value != 0; state.BlendState.SetDirty(); break; - case D3DRENDERSTATE_ZBIAS: state.RasterizerState.ZBias = Value; state.DepthState.SetDirty(); break; + case D3DRENDERSTATE_ALPHABLENDENABLE: state.BlendState.BlendEnabled = Value != 0; break; + case D3DRENDERSTATE_ZBIAS: state.RasterizerState.ZBias = Value; break; case D3DRENDERSTATE_TEXTUREFACTOR: state.GraphicsState.FF_TextureFactor = float4( Value ); break; case D3DRENDERSTATE_LIGHTING: state.GraphicsState.SetGraphicsSwitch( GSWITCH_LIGHING, Value != 0 ); break; } @@ -346,15 +346,12 @@ class MyDirect3DDevice7 : public IDirect3DDevice7 { case D3DTSS_ADDRESS: state.SamplerState.AddressU = static_cast(Value); state.SamplerState.AddressV = static_cast(Value); - state.SamplerState.SetDirty(); break; case D3DTSS_ADDRESSU: state.SamplerState.AddressU = static_cast(Value); - state.SamplerState.SetDirty(); break; case D3DTSS_ADDRESSV: state.SamplerState.AddressV = static_cast(Value); - state.SamplerState.SetDirty(); break; case D3DTSS_BORDERCOLOR: break; @@ -509,7 +506,6 @@ class MyDirect3DDevice7 : public IDirect3DDevice7 { // Gothic wants that for the sky Engine::GAPI->GetRendererState().RasterizerState.FrontCounterClockwise = true; - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); Engine::GraphicsEngine->SetActiveVertexShader( VShaderID::VS_TransformedEx ); Engine::GraphicsEngine->BindViewportInformation( VShaderID::VS_TransformedEx, 0 ); break; @@ -573,7 +569,6 @@ class MyDirect3DDevice7 : public IDirect3DDevice7 { // Gothic wants that for the sky Engine::GAPI->GetRendererState().RasterizerState.FrontCounterClockwise = true; - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); Engine::GraphicsEngine->DrawVertexBufferFF( static_cast(lpd3dVertexBuffer)->GetVertexBuffer(), dwNumVertices, dwStartVertex, sizeof( Gothic_XYZRHW_DIF_T1_Vertex ) ); break; diff --git a/D3D11Engine/EditorLinePrimitive.cpp b/D3D11Engine/EditorLinePrimitive.cpp index df0fb872..660327e4 100644 --- a/D3D11Engine/EditorLinePrimitive.cpp +++ b/D3D11Engine/EditorLinePrimitive.cpp @@ -857,7 +857,6 @@ void EditorLinePrimitive::RenderVertexBuffer( const Microsoft::WRL::ComPtrSetDefaultStates(); Engine::GAPI->GetRendererState().BlendState.SetAlphaBlending(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); engine->UpdateRenderStates(); Shader->Apply(); diff --git a/D3D11Engine/Frustum.h b/D3D11Engine/Frustum.h index 6834d571..6dc86ae3 100644 --- a/D3D11Engine/Frustum.h +++ b/D3D11Engine/Frustum.h @@ -59,6 +59,9 @@ class Frustum // Transform correctly to World Space viewSpaceFrustum.Transform( m_orientedBox, invView ); + + CacheOBBPlanes(); + m_useBoundingOrientedBox = true; m_useSphere = false; m_always_containing = false; @@ -72,7 +75,7 @@ class Frustum f.isValid = true; return f; } - + bool SupportsCulling() const { return !m_always_containing; } // Für perspektivische Projektion (normale Kamera) @@ -117,23 +120,63 @@ class Frustum if (m_useSphere) { return m_boundingSphere.Intersects(aabb); } - if (m_useBoundingOrientedBox) { - return m_orientedBox.Intersects(aabb); + + const float cx = aabb.Center.x; + const float cy = aabb.Center.y; + const float cz = aabb.Center.z; + const float ex = aabb.Extents.x; + const float ey = aabb.Extents.y; + const float ez = aabb.Extents.z; + + for ( int i = 0; i < 6; ++i ) { + const float nx = m_cachedPlanes[i].x; + const float ny = m_cachedPlanes[i].y; + const float nz = m_cachedPlanes[i].z; + const float w = m_cachedPlanes[i].w; + + // Distance from the AABB center to the plane + const float dist = nx * cx + ny * cy + nz * cz + w; + + // Projected radius of the AABB onto the plane's normal + const float projRadius = ex * std::abs( nx ) + ey * std::abs( ny ) + ez * std::abs( nz ); + + // If the center is further outside the plane than its projected radius, + // the entire box is disjoint. We can early-out immediately. + if ( dist > projRadius ) { + return false; + } } - return m_frustum.Intersects(aabb); + + // If no separating plane was found, it must be intersecting or contained. + return true; } // Schneller Sphere-Test für VOBs - bool Intersects(const BoundingSphere& sphere) const { - if (m_always_containing) return true; + bool Intersects( const BoundingSphere& sphere ) const { + if ( m_always_containing ) return true; - if (m_useSphere) { - return m_boundingSphere.Intersects(sphere); + if ( m_useSphere ) { + return m_boundingSphere.Intersects( sphere ); } - if (m_useBoundingOrientedBox) { - return m_orientedBox.Intersects(sphere); + + const float cx = sphere.Center.x; + const float cy = sphere.Center.y; + const float cz = sphere.Center.z; + const float r = sphere.Radius; + + // Scalar early-out loop. + // For outward-facing planes, if distance > radius, it is completely outside. + for ( int i = 0; i < 6; ++i ) { + const float dist = m_cachedPlanes[i].x * cx + + m_cachedPlanes[i].y * cy + + m_cachedPlanes[i].z * cz + + m_cachedPlanes[i].w; + if ( dist > r ) { + return false; + } } - return m_frustum.Intersects(sphere); + + return true; } // Schneller AABB-Test @@ -142,22 +185,52 @@ class Frustum if (m_useSphere) { return m_boundingSphere.Contains(aabb); } - if (m_useBoundingOrientedBox) { - return m_orientedBox.Contains(aabb); + + const float cx = aabb.Center.x; + const float cy = aabb.Center.y; + const float cz = aabb.Center.z; + const float ex = aabb.Extents.x; + const float ey = aabb.Extents.y; + const float ez = aabb.Extents.z; + + bool intersects = false; + + for ( int i = 0; i < 6; ++i ) { + const float nx = m_cachedPlanes[i].x; + const float ny = m_cachedPlanes[i].y; + const float nz = m_cachedPlanes[i].z; + const float w = m_cachedPlanes[i].w; + + // 1. Calculate distance from the AABB center to the plane + const float dist = nx * cx + ny * cy + nz * cz + w; + + // 2. Calculate the projected radius of the AABB onto the plane's normal + const float projRadius = ex * std::abs( nx ) + ey * std::abs( ny ) + ez * std::abs( nz ); + + // 3. Since planes are OUTWARD facing: + if ( dist > projRadius ) { + return DirectX::ContainmentType::DISJOINT; // Completely outside + } + if ( dist > -projRadius ) { + intersects = true; // Partially inside, keep checking the other planes + } } - return aabb.ContainedBy( - XMLoadFloat4(&m_cachedPlanes[0]), - XMLoadFloat4(&m_cachedPlanes[1]), - XMLoadFloat4(&m_cachedPlanes[2]), - XMLoadFloat4(&m_cachedPlanes[3]), - XMLoadFloat4(&m_cachedPlanes[4]), - XMLoadFloat4(&m_cachedPlanes[5]) - ); + + return intersects ? DirectX::ContainmentType::INTERSECTS : DirectX::ContainmentType::CONTAINS; } bool Intersects( const zTBBox3D& aabb ) const { if ( m_always_containing ) return true; - return Intersects( BBoxFromzTBBox3D( aabb ) ); + // Fast scalar conversion - avoids memory->SIMD->memory roundtrip + BoundingBox bb; + bb.Center.x = (aabb.Min.x + aabb.Max.x) * 0.5f; + bb.Center.y = (aabb.Min.y + aabb.Max.y) * 0.5f; + bb.Center.z = (aabb.Min.z + aabb.Max.z) * 0.5f; + bb.Extents.x = (aabb.Max.x - aabb.Min.x) * 0.5f; + bb.Extents.y = (aabb.Max.y - aabb.Min.y) * 0.5f; + bb.Extents.z = (aabb.Max.z - aabb.Min.z) * 0.5f; + + return Intersects( bb ); } DirectX::ContainmentType Contains(const zTBBox3D& aabb) const { @@ -192,12 +265,14 @@ class Frustum return Contains(bb); } - static BoundingBox BBoxFromzTBBox3D(const zTBBox3D& box) { + static BoundingBox BBoxFromzTBBox3D(const zTBBox3D& aabb) { BoundingBox bb; - XMVECTOR bbMin = XMLoadFloat3(&box.Min); - XMVECTOR bbMax = XMLoadFloat3(&box.Max); - XMStoreFloat3(&bb.Center, XMVectorScale(XMVectorAdd(bbMin, bbMax), 0.5f)); - XMStoreFloat3(&bb.Extents, XMVectorScale(XMVectorSubtract(bbMax, bbMin), 0.5f)); + bb.Center.x = (aabb.Min.x + aabb.Max.x) * 0.5f; + bb.Center.y = (aabb.Min.y + aabb.Max.y) * 0.5f; + bb.Center.z = (aabb.Min.z + aabb.Max.z) * 0.5f; + bb.Extents.x = (aabb.Max.x - aabb.Min.x) * 0.5f; + bb.Extents.y = (aabb.Max.y - aabb.Min.y) * 0.5f; + bb.Extents.z = (aabb.Max.z - aabb.Min.z) * 0.5f; return bb; } @@ -231,29 +306,29 @@ class Frustum XMVECTOR vOrigin = XMLoadFloat3(&m_frustum.Origin); XMVECTOR vOrientation = XMLoadFloat4(&m_frustum.Orientation); - // Left plane - XMVECTOR plane = XMVectorSet(-1.0f, 0.0f, m_frustum.LeftSlope, 0.0f); + // Near plane + XMVECTOR plane = XMVectorSet(0.0f, 0.0f, -1.0f, m_frustum.Near); plane = DirectX::MathInternal::XMPlaneTransform(plane, vOrientation, vOrigin); XMStoreFloat4(&m_cachedPlanes[0], XMPlaneNormalize(plane)); + // Left plane + plane = XMVectorSet(-1.0f, 0.0f, m_frustum.LeftSlope, 0.0f); + plane = DirectX::MathInternal::XMPlaneTransform(plane, vOrientation, vOrigin); + XMStoreFloat4(&m_cachedPlanes[1], XMPlaneNormalize(plane)); + // Right plane plane = XMVectorSet(1.0f, 0.0f, -m_frustum.RightSlope, 0.0f); plane = DirectX::MathInternal::XMPlaneTransform(plane, vOrientation, vOrigin); - XMStoreFloat4(&m_cachedPlanes[1], XMPlaneNormalize(plane)); + XMStoreFloat4(&m_cachedPlanes[2], XMPlaneNormalize(plane)); // Bottom plane plane = XMVectorSet(0.0f, -1.0f, m_frustum.BottomSlope, 0.0f); plane = DirectX::MathInternal::XMPlaneTransform(plane, vOrientation, vOrigin); - XMStoreFloat4(&m_cachedPlanes[2], XMPlaneNormalize(plane)); + XMStoreFloat4(&m_cachedPlanes[3], XMPlaneNormalize(plane)); // Top plane plane = XMVectorSet(0.0f, 1.0f, -m_frustum.TopSlope, 0.0f); plane = DirectX::MathInternal::XMPlaneTransform(plane, vOrientation, vOrigin); - XMStoreFloat4(&m_cachedPlanes[3], XMPlaneNormalize(plane)); - - // Near plane - plane = XMVectorSet(0.0f, 0.0f, -1.0f, m_frustum.Near); - plane = DirectX::MathInternal::XMPlaneTransform(plane, vOrientation, vOrigin); XMStoreFloat4(&m_cachedPlanes[4], XMPlaneNormalize(plane)); // Far plane @@ -261,6 +336,47 @@ class Frustum plane = DirectX::MathInternal::XMPlaneTransform(plane, vOrientation, vOrigin); XMStoreFloat4(&m_cachedPlanes[5], XMPlaneNormalize(plane)); } + + // Cache world-space planes from an Oriented Bounding Box (Directional Light / Ortho) +// Plane order: [0]=Left, [1]=Right, [2]=Bottom, [3]=Top, [4]=Near, [5]=Far +void CacheOBBPlanes() { + XMVECTOR C = XMLoadFloat3(&m_orientedBox.Center); + XMVECTOR E = XMLoadFloat3(&m_orientedBox.Extents); + XMVECTOR Q = XMLoadFloat4(&m_orientedBox.Orientation); + + XMMATRIX R = XMMatrixRotationQuaternion(Q); + XMVECTOR AxisX = R.r[0]; + XMVECTOR AxisY = R.r[1]; + XMVECTOR AxisZ = R.r[2]; + + XMVECTOR Ex = XMVectorSplatX(E); + XMVECTOR Ey = XMVectorSplatY(E); + XMVECTOR Ez = XMVectorSplatZ(E); + + // Near face: Min Z boundary. Outward normal is -AxisZ + XMVECTOR P_Near = XMVectorSubtract( C, XMVectorMultiply( AxisZ, Ez ) ); + XMStoreFloat4( &m_cachedPlanes[0], XMPlaneFromPointNormal( P_Near, XMVectorNegate( AxisZ ) ) ); + + // Left face: Min X boundary. Outward normal is -AxisX + XMVECTOR P_Left = XMVectorSubtract( C, XMVectorMultiply( AxisX, Ex ) ); + XMStoreFloat4( &m_cachedPlanes[1], XMPlaneFromPointNormal( P_Left, XMVectorNegate( AxisX ) ) ); + + // Right face: Max X boundary. Outward normal is +AxisX + XMVECTOR P_Right = XMVectorAdd( C, XMVectorMultiply( AxisX, Ex ) ); + XMStoreFloat4( &m_cachedPlanes[2], XMPlaneFromPointNormal( P_Right, AxisX ) ); + + // Bottom face: Min Y boundary. Outward normal is -AxisY + XMVECTOR P_Bottom = XMVectorSubtract( C, XMVectorMultiply( AxisY, Ey ) ); + XMStoreFloat4( &m_cachedPlanes[3], XMPlaneFromPointNormal( P_Bottom, XMVectorNegate( AxisY ) ) ); + + // Top face: Max Y boundary. Outward normal is +AxisY + XMVECTOR P_Top = XMVectorAdd( C, XMVectorMultiply( AxisY, Ey ) ); + XMStoreFloat4( &m_cachedPlanes[4], XMPlaneFromPointNormal( P_Top, AxisY ) ); + + // Far face: Max Z boundary. Outward normal is +AxisZ + XMVECTOR P_Far = XMVectorAdd( C, XMVectorMultiply( AxisZ, Ez ) ); + XMStoreFloat4( &m_cachedPlanes[5], XMPlaneFromPointNormal( P_Far, AxisZ ) ); +} private: // Helper to get frustum corners for AABB creation @@ -274,7 +390,7 @@ class Frustum BoundingSphere m_boundingSphere; BoundingOrientedBox m_orientedBox; - std::array m_cachedPlanes{}; // [0]=Left, [1]=Right, [2]=Bottom, [3]=Top, [4]=Near, [5]=Far + std::array m_cachedPlanes{}; bool m_useSphere = false; bool m_useBoundingOrientedBox = false; bool m_always_containing = false; diff --git a/D3D11Engine/GVegetationBox.cpp b/D3D11Engine/GVegetationBox.cpp index 8d6eb97b..523d51e5 100644 --- a/D3D11Engine/GVegetationBox.cpp +++ b/D3D11Engine/GVegetationBox.cpp @@ -309,7 +309,6 @@ void GVegetationBox::RenderVegetation( const XMFLOAT3& eye ) { VegetationTexture->BindToPixelShader( 1 ); Engine::GAPI->GetRendererState().RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_NONE; - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); // Enable alpha-to-coverage @@ -317,7 +316,6 @@ void GVegetationBox::RenderVegetation( const XMFLOAT3& eye ) { Engine::GAPI->GetRendererState().BlendState.SetDefault(); Engine::GAPI->GetRendererState().BlendState.BlendEnabled = false; Engine::GAPI->GetRendererState().BlendState.AlphaToCoverage = Engine::GAPI->GetRendererState().RendererSettings.VegetationAlphaToCoverage; - Engine::GAPI->GetRendererState().BlendState.SetDirty(); } Engine::GraphicsEngine->SetActiveVertexShader( VShaderID::VS_GrassInstanced ); @@ -356,7 +354,6 @@ void GVegetationBox::RenderVegetation( const XMFLOAT3& eye ) { if ( Engine::GAPI->GetRendererState().RendererSettings.VegetationAlphaToCoverage ) { Engine::GAPI->GetRendererState().BlendState.SetDefault(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); } } diff --git a/D3D11Engine/GothicAPI.cpp b/D3D11Engine/GothicAPI.cpp index c19cbf4d..06dad5dc 100644 --- a/D3D11Engine/GothicAPI.cpp +++ b/D3D11Engine/GothicAPI.cpp @@ -47,6 +47,7 @@ // TODO: REMOVE THIS! #include "D3D11GraphicsEngine.h" +#include "D3D11TextureAtlasManager.h" #ifndef PUBLIC_RELEASE #define OPT_DBG_NOINLINE __declspec(noinline) @@ -789,11 +790,11 @@ void GothicAPI::ResetVobs() { AnimatedSkeletalVobs.clear(); // Delete light vobs - for ( auto const& it : VobLightMap ) { + for ( auto const& it : VobLights_Sorted ) { Engine::GraphicsEngine->OnVobRemovedFromWorld( it.first ); delete it.second; } - VobLightMap.clear(); + VobLights_Sorted.clear(); } /** Called when the game loaded a new level */ @@ -877,12 +878,11 @@ void GothicAPI::OnWorldLoaded() { zCTree* vobTree = oCGame::GetGame()->_zCSession_world->GetGlobalVobTree(); TraverseVobTree( vobTree ); - // Build instancing cache for the static vobs for each section - BuildStaticMeshInstancingCache(); - // Build vob info cache for the bsp-leafs BuildBspVobMapCache(); + // Build instancing cache for the static vobs for each section + BuildStaticMeshInstancingCache(); #ifdef BUILD_GOTHIC_1_08k if ( LoadedWorldInfo->CustomWorldLoaded ) { CreatezCPolygonsForSections(); @@ -925,6 +925,7 @@ void GothicAPI::OnWorldLoaded() { #endif _canClearVobsByVisual = false; + Engine::GraphicsEngine->OnWorldLoaded(); } void GothicAPI::LoadRendererWorldSettings( GothicRendererSettings& s ) @@ -1266,7 +1267,6 @@ void GothicAPI::DrawWorldMeshNaive() { // Set up frustum for the camera RendererState.RasterizerState.SetDefault(); - RendererState.RasterizerState.SetDirty(); zCCamera::GetCamera()->Activate(); auto drawRadius = RendererState.RendererSettings.SkeletalMeshDrawRadius; @@ -1947,7 +1947,7 @@ void GothicAPI::OnRemovedVob( zCVob* vob, zCWorld* world ) { SkeletalVobInfo* svi = SkeletalVobMap[vob]; // Tell all dynamic lights that we removed a vob they could have cached - for ( auto& vlit : VobLightMap ) { + for ( auto& vlit : VobLights_Sorted ) { if ( vi && vlit.second->LightShadowBuffers ) vlit.second->LightShadowBuffers->OnVobRemovedFromWorld( vi ); @@ -1955,7 +1955,12 @@ void GothicAPI::OnRemovedVob( zCVob* vob, zCWorld* world ) { vlit.second->LightShadowBuffers->OnVobRemovedFromWorld( svi ); } - VobLightInfo* li = VobLightMap[static_cast(vob)]; + VobLightInfo* li = nullptr; + { + auto lit = VobLights_Sorted.find( static_cast(vob) ); + if ( lit != VobLights_Sorted.end() ) + li = lit->second; + } // Erase it from the particle-effect list auto pit = std::find( ParticleEffectVobs.begin(), ParticleEffectVobs.end(), vob ); @@ -1971,7 +1976,7 @@ void GothicAPI::OnRemovedVob( zCVob* vob, zCWorld* world ) { } // Erase it from the list of lights - VobLightMap.erase( static_cast(vob) ); + VobLights_Sorted.erase( static_cast(vob) ); // Remove from BSP-Cache std::vector* nodes = nullptr; @@ -2873,11 +2878,8 @@ void GothicAPI::DrawTransparencyVobs() { if ( !TransparencyVobs.empty() ) { // Setup alpha blending RendererState.RasterizerState.SetDefault(); - RendererState.RasterizerState.SetDirty(); RendererState.BlendState.SetAlphaBlending(); - RendererState.BlendState.SetDirty(); RendererState.DepthState.SetDefault(); - RendererState.DepthState.SetDirty(); } auto psBufGAI = g->GetShaderManager().GetPShader( PShaderID::PS_Transparency )->GetBuffer( "GhostAlphaInfo" ); @@ -2962,11 +2964,8 @@ void GothicAPI::DrawSkeletalVN() { SkeletalVobInfo* vi = VNSkeletalVobs.back(); RendererState.RasterizerState.SetDefault(); - RendererState.RasterizerState.SetDirty(); RendererState.BlendState.SetAlphaBlending(); - RendererState.BlendState.SetDirty(); RendererState.DepthState.SetDefault(); - RendererState.DepthState.SetDirty(); D3D11GraphicsEngine* g = reinterpret_cast(Engine::GraphicsEngine); @@ -3886,7 +3885,7 @@ void GothicAPI::CollectVisibleVobs( ctx.drawDistances.OutdoorVobsSmall = RendererState.RendererSettings.OutdoorSmallVobDrawRadius; ctx.drawDistances.IndoorVobs = RendererState.RendererSettings.IndoorVobDrawRadius; ctx.drawDistances.VisualFX = RendererState.RendererSettings.VisualFXDrawRadius; - CollectVisibleVobs( ctx ); + CollectVisibleVobs( ctx, collectFlags ); if ( RendererState.RendererSettings.SortRenderQueue ) { struct SortableVob { @@ -3929,6 +3928,7 @@ void GothicAPI::CollectVisibleVobs( // they should be unique at this point. if ( collectFlags & COLLECT_MUTATE ) { + for ( auto it : renderQueue.vobs ) { VobInstanceInfo vii = {}; vii.world = it->WorldMatrix; @@ -4125,14 +4125,14 @@ std::vector::iterator GothicAPI::MoveVobFromBspToDynamic( VobInfo* vob static void CVVH_AddNotDrawnVobToList( std::vector& source, - float dist, + float distSq, const RndCullContext& ctx, DirectX::ContainmentType bspContainment, BspTreeVobVisitor* visitor ) { const auto camPos = XMLoadFloat3( &ctx.cameraPosition ); - auto cullingEnabled = Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.Culling.CullVobs; - auto distSq = dist * dist; + auto cullingEnabled = Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.Culling.CullVobs + && ctx.frustum.SupportsCulling(); for ( auto const& it : source ) { if ( it->VisibleInRenderPass ) continue; @@ -4159,13 +4159,14 @@ static void CVVH_AddNotDrawnVobToList( static void CVVH_AddNotDrawnVobToList( std::vector& source, - float dist, const RndCullContext& ctx, + float distSq, const RndCullContext& ctx, DirectX::ContainmentType bspContainment, BspTreeVobVisitor* visitor) { const auto camPos = XMLoadFloat3( &ctx.cameraPosition ); - auto cullingEnabled = Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.Culling.CullVobs; - auto vDistSq = XMVectorReplicate( dist * dist ); + auto cullingEnabled = Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.Culling.CullVobs + && ctx.frustum.SupportsCulling(); + auto vDistSq = XMVectorReplicate( distSq ); for ( auto const& it : source ) { if ( it->VisibleInRenderPass ) continue; @@ -4254,12 +4255,12 @@ void GothicAPI::BuildBspVobMapCacheHelper( zCBspBase* base ) { for ( int i = 0; i < leaf->LightVobList.NumInArray; i++ ) { zCVobLight* vob = leaf->LightVobList.Array[i]; - // Add the light to the map if not already done - auto vit = VobLightMap.find( vob ); - if ( vit == VobLightMap.end() ) { + // Add the light to the sorted vector if not already done + auto [vit, inserted] = VobLights_Sorted.insert( vob, nullptr ); + if ( inserted ) { VobLightInfo* vi = new VobLightInfo; vi->Vob = vob; - VobLightMap[vob] = vi; + vit->second = vi; float minDynamicUpdateLightRange = Engine::GAPI->GetRendererState().RendererSettings.MinLightShadowUpdateRange; if ( RendererState.RendererSettings.EnablePointlightShadows >= GothicRendererSettings::PLS_STATIC_ONLY @@ -4454,7 +4455,7 @@ void GothicAPI::ResetVobFrameStats( ) { for ( auto&& it : VobMap ) { it.second->VisibleInRenderPass = false; } - for ( auto&& it : VobLightMap ) { + for ( auto&& it : VobLights_Sorted ) { it.second->VisibleInRenderPass = false; it.second->VisibleInFrame = false; } @@ -5605,23 +5606,27 @@ static void CollectVisibleVobsHelper( BspInfo* base, const RndCullContext& ctx, BspTreeVobVisitor* visitor, DirectX::ContainmentType inheritedContainment, - float yMaxWorld + float yMaxWorld, + EBspTreeCollectFlags collectFlags = EBspTreeCollectFlags::COLLECT_ALL_NO_MUTATE ) { - const float vobIndoorDist = ctx.drawDistances.IndoorVobs; - const float vobOutdoorDist = ctx.drawDistances.OutdoorVobs; - const float vobOutdoorSmallDist = ctx.drawDistances.OutdoorVobsSmall; + const float vobIndoorDistSq = ctx.drawDistances.IndoorVobs * ctx.drawDistances.IndoorVobs; + const float vobOutdoorDistSq = ctx.drawDistances.OutdoorVobs * ctx.drawDistances.OutdoorVobs; + const float vobOutdoorSmallDistSq = ctx.drawDistances.OutdoorVobsSmall * ctx.drawDistances.OutdoorVobsSmall; + const float visualFXDrawRadius = ctx.drawDistances.VisualFX; + const float visualFXDrawRadiusSq = ctx.drawDistances.VisualFX * ctx.drawDistances.VisualFX; + const XMFLOAT3 camPos = ctx.cameraPosition; const FXMVECTOR cameraPosition = XMLoadFloat3( &camPos ); - EBspTreeCollectFlags collectFlags = EBspTreeCollectFlags::COLLECT_ALL_NO_MUTATE; int clipFlags = EGothicCullFlags::CullSidesNear; if ( ctx.stage == RenderStage::STAGE_DRAW_SHADOWS ) { - collectFlags = EBspTreeCollectFlags::COLLECT_VOBS; clipFlags = EGothicCullFlags::CullSidesNear; } + const bool checkDist = (collectFlags & COLLECT_DISABLE_CHECK_DIST) == 0; + const auto& RendererState = Engine::GAPI->GetRendererState(); - auto& VobLightMap = Engine::GAPI->VobLightMap; + auto& VobLights = Engine::GAPI->VobLights_Sorted; while ( base->OriginalNode ) { // Check for occlusion-culling if ( RendererState.RendererSettings.EnableOcclusionCulling && !base->OcclusionInfo.VisibleLastFrame ) { @@ -5633,9 +5638,11 @@ static void CollectVisibleVobsHelper( BspInfo* base, nodeYMax = std::max( nodeYMax, base->OriginalNode->BBox3D.Max.y ); nodeBox.Max.y = nodeYMax; - float dist = Toolbox::ComputePointAABBDistance( camPos, base->OriginalNode->BBox3D.Min, base->OriginalNode->BBox3D.Max ); + const float distSq = checkDist + ? Toolbox::ComputePointAABBDistanceSq( camPos, base->OriginalNode->BBox3D.Min, base->OriginalNode->BBox3D.Max ) + : 0; ContainmentType clipResult = inheritedContainment; - if ( dist < vobOutdoorDist ) { + if ( distSq < vobOutdoorDistSq ) { if ( !RendererState.RendererSettings.EnableOcclusionCulling ) { if ( clipResult != ContainmentType::CONTAINS ) { clipResult = ctx.frustum.Contains( Frustum::BBoxFromzTBBox3D( nodeBox ) ); @@ -5674,37 +5681,40 @@ static void CollectVisibleVobsHelper( BspInfo* base, std::vector& listC = base->Vobs; std::vector& listD = base->Mobs; - const float dist = Toolbox::ComputePointAABBDistance( camPos, base->OriginalNode->BBox3D.Min, base->OriginalNode->BBox3D.Max ); + const float distSq = checkDist + ? Toolbox::ComputePointAABBDistanceSq( camPos, base->OriginalNode->BBox3D.Min, base->OriginalNode->BBox3D.Max ) + : 0; if ( collectFlags & COLLECT_VOBS && RendererState.RendererSettings.DrawVOBs ) { - if ( collectFlags & COLLECT_INDOOR_VOBS && dist < vobIndoorDist ) { - CVVH_AddNotDrawnVobToList( listA, vobIndoorDist, ctx, clipResult, visitor ); + if ( collectFlags & COLLECT_INDOOR_VOBS && distSq < vobIndoorDistSq ) { + CVVH_AddNotDrawnVobToList( listA, vobIndoorDistSq, ctx, clipResult, visitor ); } - if ( dist < vobOutdoorSmallDist ) { - CVVH_AddNotDrawnVobToList( listB, vobOutdoorSmallDist, ctx, clipResult, visitor ); + if ( distSq < vobOutdoorSmallDistSq ) { + CVVH_AddNotDrawnVobToList( listB, vobOutdoorSmallDistSq, ctx, clipResult, visitor ); } - if ( dist < vobOutdoorDist ) { - CVVH_AddNotDrawnVobToList( listC, vobOutdoorDist, ctx, clipResult, visitor ); + if ( distSq < vobOutdoorDistSq ) { + CVVH_AddNotDrawnVobToList( listC, vobOutdoorDistSq, ctx, clipResult, visitor ); } } if ( collectFlags & COLLECT_MOBS - && RendererState.RendererSettings.DrawMobs && dist < vobOutdoorSmallDist ) { - CVVH_AddNotDrawnVobToList( listD, vobOutdoorDist, ctx, clipResult, visitor); + && RendererState.RendererSettings.DrawMobs && distSq < vobOutdoorSmallDistSq ) { + CVVH_AddNotDrawnVobToList( listD, vobOutdoorDistSq, ctx, clipResult, visitor); } if ( collectFlags & COLLECT_LIGHTS - && RendererState.RendererSettings.EnableDynamicLighting && dist < visualFXDrawRadius ) { - - bool markSeen = (collectFlags & COLLECT_MUTATE) != 0; + && RendererState.RendererSettings.EnableDynamicLighting && distSq < visualFXDrawRadiusSq ) { // Add dynamic lights for ( int i = 0; i < leaf->LightVobList.NumInArray; i++ ) { zCVobLight* vob = leaf->LightVobList.Array[i]; - const float lightCameraDist = XMVectorGetX( XMVector3Length( cameraPosition - vob->GetPositionWorldXM() ) ); + const float lightCameraDist = checkDist + ? XMVectorGetX( XMVector3Length( cameraPosition - vob->GetPositionWorldXM() ) ) + : 0; + if ( lightCameraDist + vob->GetLightRange() < visualFXDrawRadius ) { BoundingSphere lightSphere; @@ -5716,9 +5726,9 @@ static void CollectVisibleVobsHelper( BspInfo* base, continue; } - // Check if we already have this light - auto vit = VobLightMap.find( vob ); - if ( vit == VobLightMap.end() ) { + // Check if we already have this light, insert if new + auto [vit, inserted] = VobLights.insert( vob, nullptr ); + if ( inserted ) { bool PFXVobLight = false; if ( zCVob* parent = vob->GetVobParent() ) { if ( parent->As() ) { @@ -5726,12 +5736,12 @@ static void CollectVisibleVobsHelper( BspInfo* base, } } - // Add if not. This light must have been added during gameplay + // This light must have been added during gameplay VobLightInfo* vi = new VobLightInfo; vi->Vob = vob; vi->IsPFXVobLight = PFXVobLight; vi->UpdateShadows = !PFXVobLight; - vit = VobLightMap.emplace( vob, vi ).first; + vit->second = vi; // Create shadow-buffers for these lights since it was dynamically added to the world if ( !vi->IsPFXVobLight && RendererState.RendererSettings.EnablePointlightShadows >= GothicRendererSettings::PLS_STATIC_ONLY ) { @@ -5758,15 +5768,17 @@ static void CollectVisibleVobsHelper( BspInfo* base, boxCell.Max.y = node->BBox3D.Min.y; zTBBox3D tmpbox = boxCell; - float plane_normal; - XMStoreFloat( &plane_normal, XMVector3Dot( XMLoadFloat3( &node->Plane.Normal ), cameraPosition ) ); + float plane_normal = FLT_MAX; + if ( checkDist ) XMStoreFloat( &plane_normal, XMVector3Dot( XMLoadFloat3( &node->Plane.Normal ), cameraPosition ) ); + if ( plane_normal > node->Plane.Distance ) { if ( node->Front ) { reinterpret_cast(&tmpbox.Min)[planeAxis] = node->Plane.Distance; CollectVisibleVobsHelper( base->Front, tmpbox, ctx, visitor, clipResult, - yMaxWorld ); + yMaxWorld, + collectFlags); } reinterpret_cast(&boxCell.Max)[planeAxis] = node->Plane.Distance; @@ -5778,7 +5790,8 @@ static void CollectVisibleVobsHelper( BspInfo* base, CollectVisibleVobsHelper( base->Back, tmpbox, ctx, visitor, clipResult, - yMaxWorld ); + yMaxWorld, + collectFlags ); } reinterpret_cast(&boxCell.Min)[planeAxis] = node->Plane.Distance; @@ -5789,7 +5802,199 @@ static void CollectVisibleVobsHelper( BspInfo* base, } } -void GothicAPI::CollectVisibleVobs( const RndCullContext& ctx ) { +struct BspTraversalNode { + BspInfo* base; + zTBBox3D boxCell; + DirectX::ContainmentType inheritedContainment; +}; + +static void CollectVisibleVobsHelperNonRecursive( BspInfo* base, + zTBBox3D boxCell, + const RndCullContext& ctx, + BspTreeVobVisitor* visitor, + DirectX::ContainmentType inheritedContainment, + float yMaxWorld, + EBspTreeCollectFlags collectFlags = EBspTreeCollectFlags::COLLECT_ALL_NO_MUTATE +) { + const float vobIndoorDistSq = ctx.drawDistances.IndoorVobs * ctx.drawDistances.IndoorVobs; + const float vobOutdoorDistSq = ctx.drawDistances.OutdoorVobs * ctx.drawDistances.OutdoorVobs; + const float vobOutdoorSmallDistSq = ctx.drawDistances.OutdoorVobsSmall * ctx.drawDistances.OutdoorVobsSmall; + + const float visualFXDrawRadius = ctx.drawDistances.VisualFX; + const float visualFXDrawRadiusSq = ctx.drawDistances.VisualFX * ctx.drawDistances.VisualFX; + const XMFLOAT3 camPos = ctx.cameraPosition; + + const bool checkDist = (collectFlags & COLLECT_DISABLE_CHECK_DIST) == 0; + + // Cache globals outside the traversal loop to prevent redundant memory fetches + const auto& RendererState = Engine::GAPI->GetRendererState(); + auto& VobLights = Engine::GAPI->VobLights_Sorted; + + // Pre-allocate a small stack to eliminate recursion entirely + // 64 is exceptionally deep for a BSP tree, ensuring we won't overflow + BspTraversalNode stack[64]; + int stackPtr = 0; + + stack[stackPtr++] = { base, boxCell, inheritedContainment }; + + while ( stackPtr > 0 ) { + BspTraversalNode current = stack[--stackPtr]; + BspInfo* currBase = current.base; + zTBBox3D currBox = current.boxCell; + ContainmentType clipResult = current.inheritedContainment; + + // The original tail-recursion loop + while ( currBase && currBase->OriginalNode ) { + + if ( RendererState.RendererSettings.EnableOcclusionCulling && !currBase->OcclusionInfo.VisibleLastFrame ) { + break; // Proceed to next item in the stack + } + + zTBBox3D nodeBox = currBase->OriginalNode->BBox3D; + float nodeYMax = std::min( yMaxWorld, camPos.y ); + nodeYMax = std::max( nodeYMax, currBase->OriginalNode->BBox3D.Max.y ); + nodeBox.Max.y = nodeYMax; + + const float distSq = checkDist + ? Toolbox::ComputePointAABBDistanceSq( camPos, currBase->OriginalNode->BBox3D.Min, currBase->OriginalNode->BBox3D.Max ) + : 0; + + if ( distSq < vobOutdoorDistSq ) { + if ( !RendererState.RendererSettings.EnableOcclusionCulling ) { + if ( clipResult != ContainmentType::CONTAINS ) { + clipResult = ctx.frustum.Contains( Frustum::BBoxFromzTBBox3D( nodeBox ) ); + } + } else { + switch ( static_cast( currBase->OcclusionInfo.LastCameraClipType ) ) { + case zTCam_ClipType::ZTCAM_CLIPTYPE_IN: clipResult = ContainmentType::CONTAINS; break; + case zTCam_ClipType::ZTCAM_CLIPTYPE_CROSSING: clipResult = ContainmentType::INTERSECTS; break; + case zTCam_ClipType::ZTCAM_CLIPTYPE_OUT: clipResult = ContainmentType::DISJOINT; break; + } + } + + if ( clipResult == ContainmentType::DISJOINT ) { + break; + } + } else { + break; // Too far + } + + if ( currBase->OriginalNode->IsLeaf() ) { + zCBspLeaf* leaf = static_cast(currBase->OriginalNode); + + if ( collectFlags & COLLECT_VOBS && RendererState.RendererSettings.DrawVOBs ) { + if ( collectFlags & COLLECT_INDOOR_VOBS && distSq < vobIndoorDistSq ) { + CVVH_AddNotDrawnVobToList( currBase->IndoorVobs, vobIndoorDistSq, ctx, clipResult, visitor ); + } + if ( distSq < vobOutdoorSmallDistSq ) { + CVVH_AddNotDrawnVobToList( currBase->SmallVobs, vobOutdoorSmallDistSq, ctx, clipResult, visitor ); + } + if ( distSq < vobOutdoorDistSq ) { + CVVH_AddNotDrawnVobToList( currBase->Vobs, vobOutdoorDistSq, ctx, clipResult, visitor ); + } + } + + if ( collectFlags & COLLECT_MOBS && RendererState.RendererSettings.DrawMobs && distSq < vobOutdoorSmallDistSq ) { + CVVH_AddNotDrawnVobToList( currBase->Mobs, vobOutdoorDistSq, ctx, clipResult, visitor ); + } + + if ( collectFlags & COLLECT_LIGHTS && RendererState.RendererSettings.EnableDynamicLighting && distSq < visualFXDrawRadiusSq ) { + for ( int i = 0; i < leaf->LightVobList.NumInArray; i++ ) { + zCVobLight* vob = leaf->LightVobList.Array[i]; + + // Avoid square root by using squared distances + bool inRange = false; + if ( checkDist ) { + float range = vob->GetLightRange(); + float threshold = visualFXDrawRadius - range; + + if ( threshold > 0.0f ) { + XMFLOAT3 vobPos = vob->GetPositionWorld(); + float dx = camPos.x - vobPos.x; + float dy = camPos.y - vobPos.y; + float dz = camPos.z - vobPos.z; + float distSq = dx * dx + dy * dy + dz * dz; + inRange = distSq < (threshold * threshold); + } + } else { + inRange = true; + } + + if ( inRange ) { + BoundingSphere lightSphere; + lightSphere.Center = vob->GetPositionWorld(); + lightSphere.Radius = vob->GetLightRange(); + + if ( clipResult != ContainmentType::CONTAINS && !ctx.frustum.Intersects( lightSphere ) ) { + continue; + } + + auto [vit, inserted] = VobLights.insert( vob, nullptr ); + if ( inserted ) { + bool PFXVobLight = false; + if ( zCVob* parent = vob->GetVobParent() ) { + if ( parent->As() ) PFXVobLight = true; + } + + VobLightInfo* vi = new VobLightInfo; + vi->Vob = vob; + vi->IsPFXVobLight = PFXVobLight; + vi->UpdateShadows = !PFXVobLight; + vit->second = vi; + + if ( !vi->IsPFXVobLight && RendererState.RendererSettings.EnablePointlightShadows >= GothicRendererSettings::PLS_STATIC_ONLY ) { + BaseShadowedPointLight* pl; + Engine::GraphicsEngine->CreateShadowedPointLight( &pl, vi, true ); + vi->LightShadowBuffers.reset( pl ); + } + } + + VobLightInfo* vi = vit->second; + if ( vi->VisibleInRenderPass ) continue; + visitor->Visit( vi ); + ctx.queue->PushLightVob( vi ); + } + } + } + break; // Break the inner tail-recursion loop to pop the next stack item + } else { + zCBspNode* node = static_cast(currBase->OriginalNode); + int planeAxis = node->PlaneSignbits; + + currBox.Min.y = node->BBox3D.Min.y; + currBox.Max.y = node->BBox3D.Max.y; + + zTBBox3D tmpbox = currBox; + float plane_normal = FLT_MAX; + + // Scalar math to avoid Load-Hit-Store SIMD stalls + if ( checkDist ) { + plane_normal = (node->Plane.Normal.x * camPos.x) + + (node->Plane.Normal.y * camPos.y) + + (node->Plane.Normal.z * camPos.z); + } + + if ( plane_normal > node->Plane.Distance ) { + if ( node->Front ) { + reinterpret_cast(&tmpbox.Min)[planeAxis] = node->Plane.Distance; + stack[stackPtr++] = { currBase->Front, tmpbox, clipResult }; + } + reinterpret_cast(&currBox.Max)[planeAxis] = node->Plane.Distance; + currBase = currBase->Back; + } else { + if ( node->Back ) { + reinterpret_cast(&tmpbox.Max)[planeAxis] = node->Plane.Distance; + stack[stackPtr++] = { currBase->Back, tmpbox, clipResult }; + } + reinterpret_cast(&currBox.Min)[planeAxis] = node->Plane.Distance; + currBase = currBase->Front; + } + } + } + } +} + +void GothicAPI::CollectVisibleVobs( const RndCullContext& ctx, EBspTreeCollectFlags collectFlags ) { zCBspTree* tree = LoadedWorldInfo->BspTree; zCBspBase* rootBsp = tree->GetRootNode(); @@ -5798,11 +6003,12 @@ void GothicAPI::CollectVisibleVobs( const RndCullContext& ctx ) { static thread_local BspTreeVobVisitor bspVobVisitor{}; // Recursively go through the tree and draw all nodes - CollectVisibleVobsHelper( root, root->OriginalNode->BBox3D, + CollectVisibleVobsHelperNonRecursive( root, root->OriginalNode->BBox3D, ctx, &bspVobVisitor, ContainmentType::INTERSECTS, - Engine::GAPI->GetLoadedWorldInfo()->BspTree->GetRootNode()->BBox3D.Max.y + Engine::GAPI->GetLoadedWorldInfo()->BspTree->GetRootNode()->BBox3D.Max.y, + collectFlags ); FXMVECTOR camPos = XMLoadFloat3( &ctx.cameraPosition ); @@ -5816,7 +6022,8 @@ void GothicAPI::CollectVisibleVobs( const RndCullContext& ctx ) { std::list removeList; // TODO: This should not be needed! // Add visible dynamically added vobs - if ( RendererState.RendererSettings.DrawVOBs ) { + if ( RendererState.RendererSettings.DrawVOBs + && (collectFlags & EBspTreeCollectFlags::COLLECT_DYNAMIC_VOBS)) { float dist; for ( VobInfo* it : DynamicallyAddedVobs ) { if ( it->VisibleInRenderPass ) continue; diff --git a/D3D11Engine/GothicAPI.h b/D3D11Engine/GothicAPI.h index 0f4a1239..50cf7b85 100644 --- a/D3D11Engine/GothicAPI.h +++ b/D3D11Engine/GothicAPI.h @@ -49,15 +49,17 @@ struct RndCullContext { }; enum EBspTreeCollectFlags : unsigned int { - COLLECT_VOBS = 1 << 0, + COLLECT_VOBS = 1 << 0, // static vobs COLLECT_LIGHTS = 1 << 1, - COLLECT_MOBS = 1 << 2, - COLLECT_INDOOR_VOBS = 1 << 3, + COLLECT_MOBS = 1 << 2, // skeletal mobs + COLLECT_INDOOR_VOBS = 1 << 3, // indoor vobs + COLLECT_DYNAMIC_VOBS = 1 << 4, // dynamic static / transparent vobs - COLLECT_ALL_VOBS = COLLECT_VOBS | COLLECT_INDOOR_VOBS, + COLLECT_ALL_VOBS = COLLECT_VOBS | COLLECT_INDOOR_VOBS | COLLECT_DYNAMIC_VOBS, + COLLECT_DISABLE_CHECK_DIST = 1 << 29, COLLECT_MUTATE = 1 << 30, - COLLECT_ALL_MUTATE = 0xFFFFFFFF, + COLLECT_ALL_MUTATE = 0xFFFFFFFF & ~(COLLECT_DISABLE_CHECK_DIST), COLLECT_ALL_NO_MUTATE = COLLECT_ALL_MUTATE & ~COLLECT_MUTATE, }; @@ -225,6 +227,54 @@ class GVegetationBox; class zCMorphMesh; class zCDecal; +// Minimal flat-map: always-sorted vector of pairs for O(log n) binary-search lookups. +// All methods inline to the same lower_bound calls — zero overhead over hand-written code. +template +struct SortedPairVector { + using Entry = std::pair; + using iterator = typename std::vector::iterator; + using const_iterator = typename std::vector::const_iterator; + + iterator begin() { return m_data.begin(); } + iterator end() { return m_data.end(); } + const_iterator begin() const { return m_data.begin(); } + const_iterator end() const { return m_data.end(); } + + // Binary search for key. Returns end() if not found. + __forceinline iterator find( Key key ) { + auto it = std::lower_bound( m_data.begin(), m_data.end(), key, Cmp{} ); + return (it != m_data.end() && it->first == key) ? it : m_data.end(); + } + + // Insert {key, value} maintaining sort order. If key already exists, does nothing. + // Returns {iterator_to_element, true_if_newly_inserted}. + __forceinline std::pair insert( Key key, Value value ) { + auto it = std::lower_bound( m_data.begin(), m_data.end(), key, Cmp{} ); + if ( it != m_data.end() && it->first == key ) + return { it, false }; + return { m_data.insert( it, { key, value } ), true }; + } + + // Erase by key. Returns true if found and erased. + __forceinline bool erase( Key key ) { + auto it = find( key ); + if ( it == m_data.end() ) return false; + m_data.erase( it ); + return true; + } + + void clear() { m_data.clear(); } + bool empty() const { return m_data.empty(); } + size_t size() const { return m_data.size(); } + void reserve( size_t n ) { m_data.reserve( n ); } + +private: + struct Cmp { + bool operator()( const Entry& a, Key k ) const { return a.first < k; } + }; + std::vector m_data; +}; + class GothicAPI { public: GothicAPI(); @@ -569,7 +619,7 @@ class GothicAPI { EGothicCullFlags cullFlags = EGothicCullFlags::CullAll, EBspTreeCollectFlags collectFlags = EBspTreeCollectFlags::COLLECT_ALL_MUTATE); - void CollectVisibleVobs( const RndCullContext& ctx ); + void CollectVisibleVobs(const RndCullContext& ctx, EBspTreeCollectFlags collectFlags = EBspTreeCollectFlags::COLLECT_ALL_NO_MUTATE); /** Collects visible sections from the current camera perspective */ void CollectVisibleSections( std::vector& sections ); @@ -789,7 +839,6 @@ class GothicAPI { float GetSkyTimeScale(); static void ProcessVobAnimation( zCVob* vob, zTAnimationMode aniMode, VobInstanceInfo& vobInstance ); - private: /** Collects polygons in the given AABB */ void CollectPolygonsInAABBRec( BspInfo* base, const zTBBox3D& bbox, std::vector& list ); @@ -879,7 +928,8 @@ class GothicAPI { std::unordered_map VobMap; public: // temporarily, to allow CollectVisibleVobsHelper to be templated for inlining optimizations - phmap::flat_hash_map VobLightMap; + // Sorted by zCVobLight* for binary-search lookups + SortedPairVector VobLights_Sorted; private: phmap::flat_hash_map SkeletalVobMap; diff --git a/D3D11Engine/GothicGraphicsState.h b/D3D11Engine/GothicGraphicsState.h index c62b4820..044b3614 100644 --- a/D3D11Engine/GothicGraphicsState.h +++ b/D3D11Engine/GothicGraphicsState.h @@ -117,18 +117,13 @@ struct GothicGraphicsState { }; __declspec(align(4)) struct GothicPipelineState { - /** Sets this state dirty, which means that it will be updated before next rendering */ - void SetDirty() { - StateDirty = true; - HashThis( reinterpret_cast(this), StructSize ); - } - - /** Hashes the whole struct */ - void HashThis( char* data, int size ) { + /** Recomputes the hash from current state data. Called automatically by UpdateRenderStates(). */ + void ComputeHash() { Hash = 0; - // Start hashing at the data of the other structs, skip the data of this one - for ( int i = sizeof( GothicPipelineState ); i < size; i += 4 ) { + // Hash the derived struct data, skipping the base GothicPipelineState fields + char* data = reinterpret_cast(this); + for ( int i = sizeof( GothicPipelineState ); i < StructSize; i += 4 ) { DWORD d; memcpy( &d, data + i, 4 ); @@ -140,7 +135,6 @@ __declspec(align(4)) struct GothicPipelineState { return Hash == o.Hash; } - bool StateDirty; size_t Hash; int StructSize; }; @@ -220,7 +214,6 @@ struct GothicDepthBufferStateInfo : public GothicPipelineState { c.DepthWriteEnabled = DepthWriteEnabled; c.DepthBufferCompareFunc = DepthBufferCompareFunc; - c.StateDirty = StateDirty; c.Hash = Hash; c.StructSize = StructSize; return c; @@ -232,7 +225,6 @@ struct GothicDepthBufferStateInfo : public GothicPipelineState { c.DepthBufferCompareFunc = DepthBufferCompareFunc; c.StructSize = StructSize; - c.SetDirty(); } }; @@ -371,7 +363,6 @@ struct GothicBlendStateInfo : public GothicPipelineState { c.AlphaToCoverage = AlphaToCoverage; c.ColorWritesEnabled = ColorWritesEnabled; - c.StateDirty = StateDirty; c.Hash = Hash; c.StructSize = StructSize; return c; @@ -389,7 +380,6 @@ struct GothicBlendStateInfo : public GothicPipelineState { c.ColorWritesEnabled = ColorWritesEnabled; c.StructSize = StructSize; - c.SetDirty(); } }; @@ -640,6 +630,7 @@ struct GothicRendererSettings { WireframeVobs = false; WireframeWorld = false; DrawShadowGeometry = true; + UseIndirectVobShadows = false; FixViewFrustum = false; DisableWatermark = true; DisableRendering = false; @@ -828,6 +819,7 @@ struct GothicRendererSettings { int ShadowCascadePCFLimit; E_ShadowFrustumCulling ShadowFrustumCullingMode; bool DrawShadowGeometry; + bool UseIndirectVobShadows; bool VegetationAlphaToCoverage; bool DisableWatermark; bool DisableRendering; @@ -968,6 +960,8 @@ struct GothicRendererSettings { bool UseLayeredRendering; bool UseShadowAtlas; bool ForceFeatureLevel10; + bool EnableAtlasStaticVobs; + bool EnableAtlasWorldMesh; } FeatureSet; } DebugSettings; }; @@ -1094,10 +1088,10 @@ struct GothicRendererState { TransformState.SetDefault(); RendererSettings.SetDefault(); - DepthState.SetDirty(); - BlendState.SetDirty(); - RasterizerState.SetDirty(); - SamplerState.SetDirty(); + DepthState.ComputeHash(); + BlendState.ComputeHash(); + RasterizerState.ComputeHash(); + SamplerState.ComputeHash(); } GothicDepthBufferStateInfo DepthState; diff --git a/D3D11Engine/ImGuiShim.cpp b/D3D11Engine/ImGuiShim.cpp index 4be20a31..c04d0131 100644 --- a/D3D11Engine/ImGuiShim.cpp +++ b/D3D11Engine/ImGuiShim.cpp @@ -1321,6 +1321,7 @@ void RenderAdvancedColumn2( GothicRendererSettings& settings, GothicAPI* gapi ) if (ImGui::BeginTabItem("Shadows", nullptr, ImGuiTabItemFlags_::ImGuiTabItemFlags_NoReorder)) { ImGui::Checkbox("Lazy update", &settings.DebugSettings.ShadowCascades.LazyCascadeUpdate ); ImGui::SetItemTooltip("Update last cascades less frequently to save performance, may cause uneven frametimes"); + ImGui::Checkbox("Indirect", &settings.UseIndirectVobShadows ); ImGui::SliderFloat("Extend Back", &settings.DebugSettings.ShadowCascades.ExtendBack, -10000, 50000, "%.0f"); ImGui::SliderFloat("Extend Front", &settings.DebugSettings.ShadowCascades.ExtendFront, -10000, 50000, "%.0f"); @@ -1351,6 +1352,10 @@ void RenderAdvancedColumn2( GothicRendererSettings& settings, GothicAPI* gapi ) ImGui::SetItemTooltip("Enables a less intensive but lower quality shadow solution."); ImGui::Checkbox("Force Feature Level 10", &settings.DebugSettings.FeatureSet.ForceFeatureLevel10 ); ImGui::SetItemTooltip("Force DirectX 10 era feature support. Requires restart."); + ImGui::Checkbox("Atlas Static Vobs", &settings.DebugSettings.FeatureSet.EnableAtlasStaticVobs ); + ImGui::SetItemTooltip("Enable texture atlas based rendering for static vobs (experimental, requires world reload)"); + ImGui::Checkbox("Atlas World Mesh", &settings.DebugSettings.FeatureSet.EnableAtlasWorldMesh ); + ImGui::SetItemTooltip("Enable texture atlas based rendering for world mesh (experimental, requires world reload)"); ImGui::EndTabItem(); } diff --git a/D3D11Engine/SMAA/D3D11SMAA.cpp b/D3D11Engine/SMAA/D3D11SMAA.cpp index 0747a2be..284ed2dc 100644 --- a/D3D11Engine/SMAA/D3D11SMAA.cpp +++ b/D3D11Engine/SMAA/D3D11SMAA.cpp @@ -67,24 +67,8 @@ bool D3D11SMAA::Init() sampDesc.Filter = D3D11_FILTER_MIN_MAG_MIP_POINT; // Point filter m_device->CreateSamplerState(&sampDesc, m_samplerPoint.GetAddressOf()); - // 5. Create Helper States - D3D11_RASTERIZER_DESC rasterDesc = {}; - rasterDesc.FillMode = D3D11_FILL_SOLID; - rasterDesc.CullMode = D3D11_CULL_NONE; - rasterDesc.DepthClipEnable = true; - m_device->CreateRasterizerState(&rasterDesc, m_rasterizerState.GetAddressOf()); - - D3D11_DEPTH_STENCIL_DESC dsDesc = {}; - dsDesc.DepthEnable = FALSE; - dsDesc.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ZERO; - dsDesc.DepthFunc = D3D11_COMPARISON_ALWAYS; - m_device->CreateDepthStencilState(&dsDesc, m_disableDepthState.GetAddressOf()); - - // Default blend state (Opaque/Overwrite) - D3D11_BLEND_DESC blendDesc = {}; - blendDesc.RenderTarget[0].BlendEnable = FALSE; - blendDesc.RenderTarget[0].RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL; - m_device->CreateBlendState(&blendDesc, m_blendState.GetAddressOf()); + // Note: Rasterizer, depth-stencil, and blend states are managed by the caller + // through the Gothic state tracking system. return true; } @@ -131,9 +115,8 @@ void D3D11SMAA::Render(ID3D11ShaderResourceView* inputSRV, // Common State Setup m_context->IASetInputLayout(nullptr); // Using VertexID generation m_context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); - m_context->RSSetState(m_rasterizerState.Get()); - m_context->OMSetDepthStencilState(m_disableDepthState.Get(), 0); - m_context->OMSetBlendState(m_blendState.Get(), nullptr, 0xFFFFFFFF); + // Note: Rasterizer, depth-stencil, and blend states are configured by the caller + // through the Gothic state tracking system (Engine::GAPI->GetRendererState()). ID3D11SamplerState* samplers[] = { m_samplerLinear.Get(), m_samplerPoint.Get() }; m_context->PSSetSamplers(0, 2, samplers); @@ -220,7 +203,4 @@ void D3D11SMAA::ReleaseResources() { m_constantBuffer.Reset(); m_samplerLinear.Reset(); m_samplerPoint.Reset(); - m_rasterizerState.Reset(); - m_disableDepthState.Reset(); - m_blendState.Reset(); } diff --git a/D3D11Engine/SMAA/D3D11SMAA.h b/D3D11Engine/SMAA/D3D11SMAA.h index 6beece45..3339b469 100644 --- a/D3D11Engine/SMAA/D3D11SMAA.h +++ b/D3D11Engine/SMAA/D3D11SMAA.h @@ -68,9 +68,6 @@ class D3D11SMAA { Microsoft::WRL::ComPtr m_constantBuffer; Microsoft::WRL::ComPtr m_samplerLinear; Microsoft::WRL::ComPtr m_samplerPoint; - Microsoft::WRL::ComPtr m_rasterizerState; - Microsoft::WRL::ComPtr m_disableDepthState; - Microsoft::WRL::ComPtr m_blendState; // Default (overwrite off) int m_width; int m_height; diff --git a/D3D11Engine/ShaderIDs.h b/D3D11Engine/ShaderIDs.h index 6d42ef02..082f04aa 100644 --- a/D3D11Engine/ShaderIDs.h +++ b/D3D11Engine/ShaderIDs.h @@ -17,6 +17,7 @@ enum class VShaderID : size_t { VS_XYZRHW_DIF_T1, VS_ExInstancedObj, VS_ExInstanced, + VS_ExInstancedObjIndirectAtlas, VS_GrassInstanced, VS_Lines, VS_Lines_XYZRHW, @@ -29,6 +30,7 @@ enum class VShaderID : size_t { VS_ExCube, VS_ExNodeCube, VS_ExSkeletalCube, + VS_ExWorldAtlas, COUNT }; @@ -40,6 +42,7 @@ enum class PShaderID : size_t { PS_Rain_Snow, PS_Transparency, PS_World, + PS_WorldAtlas, PS_Water, PS_ParticleDistortion, PS_PFX_ApplyParticleDistortion, @@ -77,6 +80,9 @@ enum class PShaderID : size_t { PS_DiffuseAlphaTestShadows, PS_DiffuseNormalmappedAlphaTest, PS_DiffuseNormalmappedAlphaTestFxMap, + PS_DiffuseAtlas, + PS_DiffuseAtlasAlphaTest, + PS_DiffuseAtlasAlphaTestShadows, PS_Preview_White, PS_Preview_Textured, PS_Preview_TexturedLit, @@ -109,5 +115,7 @@ enum class CShaderID : size_t { CS_AdvanceRain, CS_LightCulling, CS_TiledShading, + CS_CullVobs, + CS_BuildHiZ, COUNT }; diff --git a/D3D11Engine/Shaders/CS_BuildHiZ.hlsl b/D3D11Engine/Shaders/CS_BuildHiZ.hlsl new file mode 100644 index 00000000..bfdb97e4 --- /dev/null +++ b/D3D11Engine/Shaders/CS_BuildHiZ.hlsl @@ -0,0 +1,57 @@ +//-------------------------------------------------------------------------------------- +// Hi-Z Pyramid Build Compute Shader +// Builds a MAX-depth mip chain for hierarchical occlusion culling (reversed-Z). +// Each mip texel stores the NEAREST depth (highest reversed-Z) in its 2x2 source region. +// Mip 0: copy from depth buffer. +// Mip N>0: 2x2 MAX downsample from previous mip. +// +// D3D11 forbids binding the same resource as both SRV and UAV, so we use a +// scratch texture as the UAV target, then CopySubresourceRegion into the +// real Hi-Z texture after each dispatch. +//-------------------------------------------------------------------------------------- + +cbuffer HiZCB : register( b0 ) +{ + uint outputWidth; + uint outputHeight; + uint inputMipLevel; + uint isCopyPass; // 1 = mip 0 (copy from depth), 0 = downsample +}; + +Texture2D InputTexture : register( t0 ); +RWTexture2D OutputTexture : register( u0 ); + +[numthreads( 8, 8, 1 )] +void CSMain( uint3 DTid : SV_DispatchThreadID ) +{ + if ( DTid.x >= outputWidth || DTid.y >= outputHeight ) + return; + + if ( isCopyPass ) + { + // Mip 0: straight copy from the depth buffer (reversed-Z, so 0 = far) + OutputTexture[DTid.xy] = InputTexture.Load( int3( DTid.xy, 0 ) ); + } + else + { + // 2x2 MAX downsample from the previous mip level of the Hi-Z texture. + // With reversed-Z depth (near=1, far=0), we take the MAX to get the + // NEAREST (closest to camera) surface per tile. + // + // CS_CullVobs then takes the MIN across footprint texels of this MAX chain, + // finding the least-occluded tile in the AABB's screen projection. + // The test "maxDepth < hiZDepth" passes only when the AABB's nearest corner + // (maxDepth) is farther than the nearest occluder in every tile of the footprint. + // + // Using MIN here instead would collapse every tile touching the sky to ~0, + // making the test never fire since depth values are non-negative. + uint2 srcBase = DTid.xy * 2; + + float d00 = InputTexture.Load( int3( srcBase + uint2( 0, 0 ), inputMipLevel ) ); + float d10 = InputTexture.Load( int3( srcBase + uint2( 1, 0 ), inputMipLevel ) ); + float d01 = InputTexture.Load( int3( srcBase + uint2( 0, 1 ), inputMipLevel ) ); + float d11 = InputTexture.Load( int3( srcBase + uint2( 1, 1 ), inputMipLevel ) ); + + OutputTexture[DTid.xy] = max( max( d00, d10 ), max( d01, d11 ) ); + } +} diff --git a/D3D11Engine/Shaders/CS_CullVobs.hlsl b/D3D11Engine/Shaders/CS_CullVobs.hlsl new file mode 100644 index 00000000..7b78232b --- /dev/null +++ b/D3D11Engine/Shaders/CS_CullVobs.hlsl @@ -0,0 +1,256 @@ +//-------------------------------------------------------------------------------------- +// GPU Frustum + Distance Culling Compute Shader +// Tests each vob AABB against 6 frustum planes + draw distance, +// writes visible instances to RWStructuredBuffer and atomically +// increments InstanceCount in the indirect args buffer. +//-------------------------------------------------------------------------------------- + +cbuffer CullCB : register( b0 ) +{ + float4 frustumPlanes[6]; + float3 cameraPosition; + float drawDistance; + float globalWindStrength; + uint windAdvanced; + uint numVobs; + uint feedbackFrameNumber; + uint enableHiZ; + uint hiZMipCount; + float hiZWidth; + float hiZHeight; + float4x4 viewProjection; +}; + +struct VobGPUData +{ + float3 aabbCenter; + float pad0; + float3 aabbExtent; + float pad1; + float4x4 world; + float4x4 prevWorld; + uint color; + float aniModeStrength; + float canBeAffectedByPlayer; + uint submeshStart; + uint submeshCount; + float minHeight; + float maxHeight; + uint pad2; +}; + +struct SubmeshGPUData +{ + int slice; + float uStart; + float vStart; + float uEnd; + float vEnd; + uint argIndex; + uint instanceBaseOffset; + uint globalSourceIndex; +}; + +struct VobInstanceInfoAtlas +{ + float4x4 world; + float4x4 prevWorld; + uint color; + float windStrength; + float canBeAffectedByPlayer; + int slice; + float uStart; + float vStart; + float uEnd; + float vEnd; + uint globalSourceIndex; + float minHeight; + float maxHeight; +}; + +StructuredBuffer VobBuffer : register( t0 ); +StructuredBuffer SubmeshBuffer : register( t1 ); +Texture2D HiZTexture : register( t2 ); +RWStructuredBuffer InstanceOutput : register( u0 ); +RWByteAddressBuffer IndirectArgsUAV : register( u1 ); + +// GPU feedback for streaming: source-indexed RWTexture2D +// The CS stamps visible sources once per (vob, submesh) — orders of magnitude +// cheaper than per-pixel atomics in the pixel shader. +RWTexture2D FeedbackUAV : register( u5 ); + +// Hi-Z occlusion test: project AABB to screen, pick mip level, compare depth. +// Returns true if the AABB is OCCLUDED (should be culled). +bool IsOccludedHiZ( float3 aabbCenter, float3 aabbExtent ) +{ + // Generate all 8 corners of the AABB + float3 corners[8]; + corners[0] = aabbCenter + float3( -aabbExtent.x, -aabbExtent.y, -aabbExtent.z ); + corners[1] = aabbCenter + float3( aabbExtent.x, -aabbExtent.y, -aabbExtent.z ); + corners[2] = aabbCenter + float3( -aabbExtent.x, aabbExtent.y, -aabbExtent.z ); + corners[3] = aabbCenter + float3( aabbExtent.x, aabbExtent.y, -aabbExtent.z ); + corners[4] = aabbCenter + float3( -aabbExtent.x, -aabbExtent.y, aabbExtent.z ); + corners[5] = aabbCenter + float3( aabbExtent.x, -aabbExtent.y, aabbExtent.z ); + corners[6] = aabbCenter + float3( -aabbExtent.x, aabbExtent.y, aabbExtent.z ); + corners[7] = aabbCenter + float3( aabbExtent.x, aabbExtent.y, aabbExtent.z ); + + float minX = 1.0, minY = 1.0, maxX = 0.0, maxY = 0.0; + float maxDepth = 0.0; // Reversed-Z: nearest corner has the highest Z. Track max across corners. + + [unroll] + for ( int i = 0; i < 8; i++ ) + { + float4 clip = mul( float4( corners[i], 1.0 ), viewProjection ); + + // Behind camera — can't occlude, bail out as visible + if ( clip.w <= 0.0 ) + return false; + + float3 ndc = clip.xyz / clip.w; + + // NDC to UV [0,1] range (Y is flipped for texture space) + float u = ndc.x * 0.5 + 0.5; + float v = -ndc.y * 0.5 + 0.5; + + minX = min( minX, u ); + maxX = max( maxX, u ); + minY = min( minY, v ); + maxY = max( maxY, v ); + + // Track the nearest AABB corner (highest Z in reversed-Z) + maxDepth = max( maxDepth, ndc.z ); + } + + // Clamp to screen bounds + minX = saturate( minX ); + maxX = saturate( maxX ); + minY = saturate( minY ); + maxY = saturate( maxY ); + + // Degenerate or off-screen — treat as visible + if ( minX >= maxX || minY >= maxY ) + return false; + + // Compute screen-space size in pixels at mip 0 + float sizeX = ( maxX - minX ) * hiZWidth; + float sizeY = ( maxY - minY ) * hiZHeight; + float maxSize = max( sizeX, sizeY ); + + // Pick mip level: we want the mip where the AABB covers roughly 2x2 texels + float mipF = ceil( log2( max( maxSize, 1.0 ) ) ); + uint mip = min( (uint)mipF, hiZMipCount - 1 ); + + // Compute texel coordinates at this mip level + float mipWidth = max( hiZWidth / (float)( 1u << mip ), 1.0 ); + float mipHeight = max( hiZHeight / (float)( 1u << mip ), 1.0 ); + + int2 texMin = int2( minX * mipWidth, minY * mipHeight ); + int2 texMax = int2( maxX * mipWidth, maxY * mipHeight ); + + // Clamp to valid range + texMin = max( texMin, int2( 0, 0 ) ); + texMax = min( texMax, int2( (int)mipWidth - 1, (int)mipHeight - 1 ) ); + + // Sample Hi-Z: take the min depth across the covered texels. + // MIN mip chain stores farthest depth per texel (reversed-Z: smallest Z = farthest). + // We take min across texels to get the overall farthest surface — conservative. + float hiZDepth = 1.0; + for ( int y = texMin.y; y <= texMax.y; y++ ) + { + for ( int x = texMin.x; x <= texMax.x; x++ ) + { + hiZDepth = min( hiZDepth, HiZTexture.Load( int3( x, y, mip ) ) ); + } + } + + // Reversed-Z: near=1, far=0. + // maxDepth = nearest AABB corner (highest Z in reversed-Z). + // HiZ is a MAX mip chain: each texel = nearest surface (highest Z) in its region. + // We take MIN across the AABB footprint texels to find the least-occluded tile. + // AABB is occluded when its nearest corner is farther than the nearest surface + // in every footprint tile, i.e. maxDepth < min(hiZMaxValues) = hiZDepth. + return ( maxDepth < hiZDepth ); +} + +[numthreads( 64, 1, 1 )] +void CSMain( uint3 DTid : SV_DispatchThreadID ) +{ + uint idx = DTid.x; + if ( idx >= numVobs ) + return; + + VobGPUData vob = VobBuffer[idx]; + + // Draw distance cull (center-to-camera distance) + float3 toCamera = vob.aabbCenter - cameraPosition; + float distSq = dot( toCamera, toCamera ); + if ( distSq > drawDistance * drawDistance ) + return; + + // Frustum cull: 6-plane AABB test + [unroll] + for ( int p = 0; p < 6; p++ ) + { + float3 n = frustumPlanes[p].xyz; + float d = frustumPlanes[p].w; + float r = dot( abs( n ), vob.aabbExtent ); + float s = dot( n, vob.aabbCenter ) + d; + if ( s - r > 0.0 ) + return; // fully outside this plane + } + + // Hi-Z occlusion cull: test AABB against hierarchical depth buffer + if ( enableHiZ ) + { + if ( IsOccludedHiZ( vob.aabbCenter, vob.aabbExtent ) ) + return; + } + + // Compute wind strength for this vob + float windStr = 0.0; + if ( vob.aniModeStrength > 0.0 && windAdvanced ) + { + windStr = max( 0.1, vob.aniModeStrength ) * globalWindStrength; + } + + // Emit one instance per submesh of this vob + for ( uint s = 0; s < vob.submeshCount; s++ ) + { + SubmeshGPUData sm = SubmeshBuffer[vob.submeshStart + s]; + + // Atomic increment InstanceCount in the indirect args buffer. + // Each D3D11_DRAW_INDEXED_INSTANCED_INDIRECT_ARGS is 20 bytes (5 x uint32): + // [0] IndexCountPerInstance + // [4] InstanceCount <-- we increment this + // [8] StartIndexLocation + // [12] BaseVertexLocation + // [16] StartInstanceLocation + uint slot; + IndirectArgsUAV.InterlockedAdd( sm.argIndex * 20 + 4, 1, slot ); + + // Write instance data at the pre-allocated offset + atomic slot + VobInstanceInfoAtlas inst; + inst.world = vob.world; + inst.prevWorld = vob.prevWorld; + inst.color = vob.color; + inst.windStrength = windStr; + inst.canBeAffectedByPlayer = vob.canBeAffectedByPlayer; + inst.slice = sm.slice; + inst.uStart = sm.uStart; + inst.vStart = sm.vStart; + inst.uEnd = sm.uEnd; + inst.vEnd = sm.vEnd; + inst.globalSourceIndex = sm.globalSourceIndex; + inst.minHeight = vob.minHeight; + inst.maxHeight = vob.maxHeight; + + InstanceOutput[sm.instanceBaseOffset + slot] = inst; + + // Stamp feedback: one atomic per visible (vob, submesh) pair. + // Far cheaper than per-pixel atomics in the PS. + if ( feedbackFrameNumber > 0 ) + { + InterlockedMax( FeedbackUAV[uint2( sm.globalSourceIndex, 0 )], feedbackFrameNumber ); + } + } +} diff --git a/D3D11Engine/Shaders/DS_Defines.h b/D3D11Engine/Shaders/DS_Defines.h index 5f31d5fe..a162a26a 100644 --- a/D3D11Engine/Shaders/DS_Defines.h +++ b/D3D11Engine/Shaders/DS_Defines.h @@ -1,7 +1,7 @@ struct DEFERRED_PS_OUTPUT { float4 vDiffuse : SV_TARGET0; - float4 vNrm : SV_TARGET1; + float2 vNrm : SV_TARGET1; float2 vSI_SP : SV_TARGET2; float2 vVelocity : SV_TARGET3; // Screen-space velocity for motion vectors float vReactiveMask : SV_TARGET4; // Screen-space velocity for motion vectors @@ -14,20 +14,25 @@ struct DEFERRED_PS_OUTPUT_ALPHA_TO_COVERAGE uint fCoverage : SV_Coverage; }; +// Octahedral encoding: map a unit normal to [-1,1]^2 for R16G16_SNORM storage +// Reference: "A Survey of Efficient Representations for Independent Unit Vectors" (Cigolle et al. 2014) +float2 OctWrap(float2 v) +{ + return (1.0 - abs(v.yx)) * (v.xy >= 0.0 ? 1.0 : -1.0); +} - -float2 EncodeNormal(float3 n) +float2 EncodeNormalGBuffer(float3 n) { - float f = sqrt(8*n.z+8); - return n.xy / f + 0.5; + n /= (abs(n.x) + abs(n.y) + abs(n.z)); + n.xy = n.z >= 0.0 ? n.xy : OctWrap(n.xy); + return n.xy; } -float3 DecodeNormal(float2 enc) + +// Decode octahedral [-1,1]^2 back to a unit normal +float3 DecodeNormalGBuffer(float2 encoded) { - float2 fenc = enc.xy*4-2; - float f = dot(fenc,fenc); - float g = sqrt(1-f/4); float3 n; - n.xy = fenc*g; - n.z = 1-f/2; - return n; + n.z = 1.0 - abs(encoded.x) - abs(encoded.y); + n.xy = n.z >= 0.0 ? encoded.xy : OctWrap(encoded.xy); + return normalize(n); } diff --git a/D3D11Engine/Shaders/PS_AtmosphereGround.hlsl b/D3D11Engine/Shaders/PS_AtmosphereGround.hlsl index e36f2496..6bba1c70 100644 --- a/D3D11Engine/Shaders/PS_AtmosphereGround.hlsl +++ b/D3D11Engine/Shaders/PS_AtmosphereGround.hlsl @@ -83,8 +83,7 @@ DEFERRED_PS_OUTPUT PSMain( PS_INPUT Input ) : SV_TARGET DEFERRED_PS_OUTPUT output; output.vDiffuse = float4(color.rgb, Input.vDiffuse.y); - output.vNrm.xyz = nrm; - output.vNrm.w = 1.0f; + output.vNrm = EncodeNormalGBuffer(nrm); output.vSI_SP.x = MI_SpecularIntensity; output.vSI_SP.y = MI_SpecularPower; diff --git a/D3D11Engine/Shaders/PS_DS_AtmosphericScattering.hlsl b/D3D11Engine/Shaders/PS_DS_AtmosphericScattering.hlsl index 92e8103d..e2e2693f 100644 --- a/D3D11Engine/Shaders/PS_DS_AtmosphericScattering.hlsl +++ b/D3D11Engine/Shaders/PS_DS_AtmosphericScattering.hlsl @@ -694,15 +694,17 @@ float4 PSMain(PS_INPUT Input) : SV_TARGET float4 diffuse = TX_Diffuse.Sample(SS_Linear, uv); float vertLighting = diffuse.a; - // Get the second GBuffer - float4 gb2 = TX_Nrm.Sample(SS_Linear, uv); - - // If we dont have a normal, just return the diffuse color - if (gb2.w < 0.001f) + // Sample depth first to detect sky pixels (reversed-Z: sky has depth == 0.0) + float expDepth = TX_Depth.Sample(SS_Linear, uv).r; + if (expDepth < 0.00001f) + // Sky pixel — no geometry was written, just return the diffuse (sky) color return float4(diffuse.rgb, 1); - // Decode the view-space normal back - float3 normal = normalize(gb2.xyz); + // Get the second GBuffer + float2 gb2 = TX_Nrm.Sample(SS_Linear, uv).xy; + + // Decode the view-space normal from octahedral R16G16_SNORM + float3 normal = DecodeNormalGBuffer(gb2); // Get specular parameters float4 gb3 = TX_SI_SP.Sample(SS_Linear, uv); @@ -710,7 +712,6 @@ float4 PSMain(PS_INPUT Input) : SV_TARGET float specPower = gb3.y; // Reconstruct VS World Position from depth - float expDepth = TX_Depth.Sample(SS_Linear, uv).r; float3 vsPosition = VSPositionFromDepth(expDepth, uv); float3 wsPosition = mul(float4(vsPosition, 1), SQ_InvView).xyz; float3 V = normalize(-vsPosition); diff --git a/D3D11Engine/Shaders/PS_DS_PointLight.hlsl b/D3D11Engine/Shaders/PS_DS_PointLight.hlsl index 9c7f3495..584e5087 100644 --- a/D3D11Engine/Shaders/PS_DS_PointLight.hlsl +++ b/D3D11Engine/Shaders/PS_DS_PointLight.hlsl @@ -100,10 +100,10 @@ float4 PSMain( PS_INPUT Input ) : SV_TARGET float4 diffuse = TX_Diffuse.Sample(SS_Linear, uv); // Get the second GBuffer - float4 gb2 = TX_Nrm.Sample(SS_Linear, uv); + float2 gb2 = TX_Nrm.Sample(SS_Linear, uv).xy; - // Decode the view-space normal back - float3 normal = normalize(gb2.xyz); + // Decode the view-space normal from octahedral R16G16_SNORM + float3 normal = DecodeNormalGBuffer(gb2); // Get specular parameters float4 gb3 = TX_SI_SP.Sample(SS_Linear, uv); diff --git a/D3D11Engine/Shaders/PS_DS_PointLightDynShadow.hlsl b/D3D11Engine/Shaders/PS_DS_PointLightDynShadow.hlsl index 8c80e67e..f2108a41 100644 --- a/D3D11Engine/Shaders/PS_DS_PointLightDynShadow.hlsl +++ b/D3D11Engine/Shaders/PS_DS_PointLightDynShadow.hlsl @@ -144,10 +144,10 @@ float4 PSMain( PS_INPUT Input ) : SV_TARGET float4 diffuse = TX_Diffuse.Sample(SS_Linear, uv); // Get the second GBuffer - float4 gb2 = TX_Nrm.Sample(SS_Linear, uv); + float2 gb2 = TX_Nrm.Sample(SS_Linear, uv).xy; - // Decode the view-space normal back - float3 normal = normalize(gb2.xyz); + // Decode the view-space normal from octahedral R16G16_SNORM + float3 normal = DecodeNormalGBuffer(gb2); // Get specular parameters float4 gb3 = TX_SI_SP.Sample(SS_Linear, uv); diff --git a/D3D11Engine/Shaders/PS_Diffuse.hlsl b/D3D11Engine/Shaders/PS_Diffuse.hlsl index 9af0b79c..83f1a6c9 100644 --- a/D3D11Engine/Shaders/PS_Diffuse.hlsl +++ b/D3D11Engine/Shaders/PS_Diffuse.hlsl @@ -120,8 +120,7 @@ DEFERRED_PS_OUTPUT PSMain( PS_INPUT Input ) : SV_TARGET //output.vDiffuse = float4(Input.vTexcoord2, 0, 1); //output.vDiffuse = float4(Input.vNormalVS, 1); - output.vNrm.xyz = nrm; - output.vNrm.w = 1.0f; + output.vNrm = EncodeNormalGBuffer(nrm); output.vSI_SP.x = MI_SpecularIntensity * fx.r; output.vSI_SP.y = MI_SpecularPower * fx.g; diff --git a/D3D11Engine/Shaders/PS_DiffuseAtlas.hlsl b/D3D11Engine/Shaders/PS_DiffuseAtlas.hlsl new file mode 100644 index 00000000..98abd01d --- /dev/null +++ b/D3D11Engine/Shaders/PS_DiffuseAtlas.hlsl @@ -0,0 +1,141 @@ +//-------------------------------------------------------------------------------------- +// Atlas pixel shader for static vobs +// Samples from Texture2DArray using (u, v, slice) from vertex shader +//-------------------------------------------------------------------------------------- +#include +#include +#include +#include + +cbuffer MI_MaterialInfo : register( b2 ) +{ + float MI_SpecularIntensity; + float MI_SpecularPower; + float MI_NormalmapStrength; + float MI_ParallaxOcclusionStrength; + + float4 MI_Color; +} + +cbuffer DIST_Distance : register( b3 ) +{ + float DIST_DrawDistance; + float DIST_LodBias; + float2 DIST_Pad; +} + +//-------------------------------------------------------------------------------------- +// Textures and Samplers +//-------------------------------------------------------------------------------------- +SamplerState SS_Linear : register( s0 ); +SamplerState SS_samMirror : register( s1 ); +Texture2DArray TX_AtlasArray : register( t0 ); +Texture2D TX_Texture1 : register( t1 ); +Texture2D TX_Texture2 : register( t2 ); +TextureCube TX_ReflectionCube : register( t4 ); + +//-------------------------------------------------------------------------------------- +// Input / Output structures +//-------------------------------------------------------------------------------------- +struct PS_INPUT +{ + float3 vTexcoord3D : TEXCOORD0; // (rawU, rawV, slice) + float2 vTexcoord2 : TEXCOORD1; + float4 vDiffuse : TEXCOORD2; + float4 vAtlasRect : TEXCOORD3; // (uStart, vStart, uEnd, vEnd) + float3 vNormalVS : TEXCOORD4; + float3 vViewPosition : TEXCOORD5; + float4 vCurrClipPos : TEXCOORD6; + float4 vPrevClipPos : TEXCOORD7; + float4 vPosition : SV_POSITION; +}; + +// Calculate screen-space velocity from clip positions +float2 CalculateVelocity(float4 currClipPos, float4 prevClipPos) +{ + if (currClipPos.w == 0.0 || prevClipPos.w == 0.0) + return float2(0, 0); + + float2 currNDC = currClipPos.xy / currClipPos.w; + float2 prevNDC = prevClipPos.xy / prevClipPos.w; + + float2 currUV = float2(currNDC.x * 0.5 + 0.5, 1.0 - (currNDC.y * 0.5 + 0.5)); + float2 prevUV = float2(prevNDC.x * 0.5 + 0.5, 1.0 - (prevNDC.y * 0.5 + 0.5)); + + return prevUV - currUV; +} + +//-------------------------------------------------------------------------------------- +// Pixel Shader +//-------------------------------------------------------------------------------------- +#if ALPHATEST_SHADOWS == 1 +void PSMain( PS_INPUT Input ) +{ + // Per-pixel atlas UV remapping: avoids frac() interpolation collapse in the VS + // (frac(1.0)=0.0 in VS causes entire [0,1] UV range to collapse to a single texel). + // SampleGrad uses gradients from the raw (pre-frac) UVs so MIP selection stays correct + // even at UV wrap boundaries where frac() would create huge derivative discontinuities. + float2 rawUV = Input.vTexcoord3D.xy; + float slice = Input.vTexcoord3D.z; + float2 atlasScale = Input.vAtlasRect.zw - Input.vAtlasRect.xy; // (uEnd-uStart, vEnd-vStart) + + // SampleGrad ignores sampler MipLODBias, so we manually apply the LOD bias + // (needed for FSR upscaling to produce sharp textures at lower resolutions) + float biasFactor = exp2(DIST_LodBias); + float2 gradX = ddx(rawUV) * atlasScale * biasFactor; + float2 gradY = ddy(rawUV) * atlasScale * biasFactor; + float2 atlasUV = Input.vAtlasRect.xy + frac(rawUV) * atlasScale; + + float4 color = TX_AtlasArray.SampleGrad(SS_Linear, float3(atlasUV, slice), gradX, gradY); + + ClipDistanceEffect(length(Input.vViewPosition), DIST_DrawDistance, color.r * 2 - 1, 500.0f); + DoAlphaTest(color.a); +} + +// Disable regular shader +DEFERRED_PS_OUTPUT PSMainDISABLED( PS_INPUT Input ) : SV_TARGET +#else +DEFERRED_PS_OUTPUT PSMain( PS_INPUT Input ) : SV_TARGET +#endif +{ + DEFERRED_PS_OUTPUT output; + output.vReactiveMask = 0.0f; + + // Per-pixel atlas UV remapping: avoids frac() interpolation collapse in the VS + // (frac(1.0)=0.0 in VS causes entire [0,1] UV range to collapse to a single texel). + // SampleGrad uses gradients from the raw (pre-frac) UVs so MIP selection stays correct + // even at UV wrap boundaries where frac() would create huge derivative discontinuities. + float2 rawUV = Input.vTexcoord3D.xy; + float slice = Input.vTexcoord3D.z; + float2 atlasScale = Input.vAtlasRect.zw - Input.vAtlasRect.xy; // (uEnd-uStart, vEnd-vStart) + + // SampleGrad ignores sampler MipLODBias, so we manually apply the LOD bias + // (needed for FSR upscaling to produce sharp textures at lower resolutions) + float biasFactor = exp2(DIST_LodBias); + float2 gradX = ddx(rawUV) * atlasScale * biasFactor; + float2 gradY = ddy(rawUV) * atlasScale * biasFactor; + float2 atlasUV = Input.vAtlasRect.xy + frac(rawUV) * atlasScale; + + float4 color = TX_AtlasArray.SampleGrad(SS_Linear, float3(atlasUV, slice), gradX, gradY); + +#if ALPHATEST == 1 + ClipDistanceEffect(length(Input.vViewPosition), DIST_DrawDistance, color.r * 2 - 1, 500.0f); + DoAlphaTest(color.a); + output.vReactiveMask = 0.1f; +#endif + + float3 nrm = normalize(Input.vNormalVS); + + float4 fx = 1.0f; + + output.vDiffuse = float4(color.rgb, Input.vDiffuse.y); + + output.vNrm = EncodeNormalGBuffer(nrm); + + output.vSI_SP.x = MI_SpecularIntensity * fx.r; + output.vSI_SP.y = MI_SpecularPower * fx.g; + + output.vVelocity = CalculateVelocity(Input.vCurrClipPos, Input.vPrevClipPos); + + return output; +} diff --git a/D3D11Engine/Shaders/PS_Grass.hlsl b/D3D11Engine/Shaders/PS_Grass.hlsl index 5f12bed1..a77ebbeb 100644 --- a/D3D11Engine/Shaders/PS_Grass.hlsl +++ b/D3D11Engine/Shaders/PS_Grass.hlsl @@ -65,8 +65,7 @@ DEFERRED_PS_OUTPUT PSMain( PS_INPUT Input ) : SV_TARGET DEFERRED_PS_OUTPUT output; output.vDiffuse = float4(color.rgb, 1); - output.vNrm.xyz = normalize(Input.vNormalVS); - output.vNrm.w = 1.0f; + output.vNrm = EncodeNormalGBuffer(normalize(Input.vNormalVS)); output.vSI_SP.xy = 0; diff --git a/D3D11Engine/Shaders/PS_PFX_GodRayMask.hlsl b/D3D11Engine/Shaders/PS_PFX_GodRayMask.hlsl index 864876a6..a1cbdb48 100644 --- a/D3D11Engine/Shaders/PS_PFX_GodRayMask.hlsl +++ b/D3D11Engine/Shaders/PS_PFX_GodRayMask.hlsl @@ -8,7 +8,7 @@ SamplerState SS_Linear : register( s0 ); SamplerState SS_samMirror : register( s1 ); Texture2D TX_Texture0 : register( t0 ); -Texture2D TX_Texture1 : register( t1 ); +Texture2D TX_Depth : register( t1 ); //-------------------------------------------------------------------------------------- // Input / Output structures @@ -26,9 +26,10 @@ struct PS_INPUT float4 PSMain( PS_INPUT Input ) : SV_TARGET { float4 color = TX_Texture0.Sample(SS_Linear, Input.vTexcoord); - float4 gb2 = TX_Texture1.Sample(SS_Linear, Input.vTexcoord); - if(gb2.w < 0.001f) + // Sky detection via depth buffer (reversed-Z: sky has depth == 0.0) + float depth = TX_Depth.Sample(SS_Linear, Input.vTexcoord).r; + if(depth < 0.00001f) return color; return float4(0,0,0,0); diff --git a/D3D11Engine/Shaders/PS_World.hlsl b/D3D11Engine/Shaders/PS_World.hlsl index 661c3785..8311f0d8 100644 --- a/D3D11Engine/Shaders/PS_World.hlsl +++ b/D3D11Engine/Shaders/PS_World.hlsl @@ -85,8 +85,7 @@ DEFERRED_PS_OUTPUT PSMain( PS_INPUT Input ) : SV_TARGET DEFERRED_PS_OUTPUT output; output.vDiffuse = float4(color.rgb, Input.vDiffuse.y); - output.vNrm.xyz = normalize(Input.vNormalVS); - output.vNrm.w = 1.0f; + output.vNrm = EncodeNormalGBuffer(normalize(Input.vNormalVS)); output.vSI_SP.x = MI_SpecularIntensity; output.vSI_SP.y = MI_SpecularPower; diff --git a/D3D11Engine/Shaders/PS_WorldAtlas.hlsl b/D3D11Engine/Shaders/PS_WorldAtlas.hlsl new file mode 100644 index 00000000..42aed68f --- /dev/null +++ b/D3D11Engine/Shaders/PS_WorldAtlas.hlsl @@ -0,0 +1,171 @@ +//-------------------------------------------------------------------------------------- +// World mesh pixel shader for atlas indirect draw path +// Samples diffuse, normal and FX maps from separate Texture2DArray atlases. +// Flags bits: 1 = HAS_NORMAL, 2 = HAS_FX, 4 = ALPHA_TEST +//-------------------------------------------------------------------------------------- +#include +#include +#include +#include + +cbuffer MI_MaterialInfo : register( b2 ) +{ + float MI_SpecularIntensity; + float MI_SpecularPower; + float MI_NormalmapStrength; + float MI_ParallaxOcclusionStrength; + + float4 MI_Color; +} + +cbuffer DIST_Distance : register( b3 ) +{ + float DIST_DrawDistance; + float DIST_LodBias; + float2 DIST_Pad; +} + +//-------------------------------------------------------------------------------------- +// Textures and Samplers +//-------------------------------------------------------------------------------------- +SamplerState SS_Linear : register( s0 ); +SamplerState SS_samMirror : register( s1 ); +Texture2DArray TX_AtlasDiffuse : register( t0 ); +Texture2DArray TX_AtlasNormal : register( t1 ); +Texture2DArray TX_AtlasFx : register( t2 ); +TextureCube TX_ReflectionCube : register( t4 ); + +//-------------------------------------------------------------------------------------- +// Input / Output structures +//-------------------------------------------------------------------------------------- +struct PS_INPUT +{ + float3 vTexcoord3D : TEXCOORD0; // (rawU, rawV, diffuseSlice) + float2 vTexcoord2 : TEXCOORD1; + float4 vDiffuse : TEXCOORD2; + float4 vAtlasRect : TEXCOORD3; // diffuse atlas rect + float3 vNormalVS : TEXCOORD4; + float3 vViewPosition : TEXCOORD5; + float4 vCurrClipPos : TEXCOORD6; + float4 vPrevClipPos : TEXCOORD7; + float3 vNormalAtlas3D : TEXCOORD8; // (rawU, rawV, normalSlice) + float4 vNormalAtlasRect : TEXCOORD9; // normal atlas rect + float3 vFxAtlas3D : TEXCOORD10; // (rawU, rawV, fxSlice) + nointerpolation uint vFlags : TEXCOORD11; + float4 vFxAtlasRect : TEXCOORD12; // fx atlas rect + float4 vPosition : SV_POSITION; +}; + +// Calculate screen-space velocity from clip positions +float2 CalculateVelocity(float4 currClipPos, float4 prevClipPos) +{ + if (currClipPos.w == 0.0 || prevClipPos.w == 0.0) + return float2(0, 0); + + float2 currNDC = currClipPos.xy / currClipPos.w; + float2 prevNDC = prevClipPos.xy / prevClipPos.w; + + float2 currUV = float2(currNDC.x * 0.5 + 0.5, 1.0 - (currNDC.y * 0.5 + 0.5)); + float2 prevUV = float2(prevNDC.x * 0.5 + 0.5, 1.0 - (prevNDC.y * 0.5 + 0.5)); + + return prevUV - currUV; +} + +// Helper: sample from an atlas Texture2DArray with correct mip via SampleGrad + frac() +// Clamps the final atlas UV inside the entry boundary, scaled by the mip level +// so that at higher mips the border grows to prevent bilinear bleed into neighbors. +float4 SampleAtlas(Texture2DArray atlas, SamplerState ss, float3 rawUVSlice, float4 atlasRect, float lodBias) +{ + float2 rawUV = rawUVSlice.xy; + float slice = rawUVSlice.z; + float2 scale = atlasRect.zw - atlasRect.xy; + // SampleGrad ignores sampler MipLODBias, so we manually apply the LOD bias + // (needed for FSR upscaling to produce sharp textures at lower resolutions) + float biasFactor = exp2(lodBias); + float2 gradX = ddx(rawUV) * scale * biasFactor; + float2 gradY = ddy(rawUV) * scale * biasFactor; + + // Query actual atlas dimensions instead of assuming a fixed size + float atlasW, atlasH, atlasSlices; + atlas.GetDimensions(atlasW, atlasH, atlasSlices); + + // Compute approximate mip level from gradients + float2 dxTex = gradX * atlasW; + float2 dyTex = gradY * atlasH; + float maxSq = max(dot(dxTex, dxTex), dot(dyTex, dyTex)); + float mipLevel = max(0.0, 0.5 * log2(maxSq)); + + // Scale the half-texel border by 2^mip so it covers the filter footprint at that level + float2 border = (0.5 / float2(atlasW, atlasH)) * exp2(ceil(mipLevel)); + + float2 atlasUV = atlasRect.xy + frac(rawUV) * scale; + atlasUV = clamp(atlasUV, atlasRect.xy + border, atlasRect.zw - border); + return atlas.SampleGrad(ss, float3(atlasUV, slice), gradX, gradY); +} + +//-------------------------------------------------------------------------------------- +// Pixel Shader +//-------------------------------------------------------------------------------------- +DEFERRED_PS_OUTPUT PSMain( PS_INPUT Input ) : SV_TARGET +{ + DEFERRED_PS_OUTPUT output; + output.vReactiveMask = 0.0f; + + // --- Diffuse --- + float4 color = SampleAtlas(TX_AtlasDiffuse, SS_Linear, Input.vTexcoord3D, Input.vAtlasRect, DIST_LodBias); + + // Alpha test + if (Input.vFlags & 4u) + { + ClipDistanceEffect(length(Input.vViewPosition), DIST_DrawDistance, color.r * 2 - 1, 500.0f); + DoAlphaTest(color.a); + output.vReactiveMask = 0.1f; + } + + // --- Normal mapping --- + float3 nrm; + if (Input.vFlags & 1u) + { + // Reconstruct the FX-atlas rect for the normal map from interpolated data. + // The normal atlas uses the same UV space as diffuse. + float4 nrmAtlasRect = Input.vNormalAtlasRect; + float2 rawUV = Input.vNormalAtlas3D.xy; + float slice = Input.vNormalAtlas3D.z; + float2 scale = nrmAtlasRect.zw - nrmAtlasRect.xy; + float biasFactor = exp2(DIST_LodBias); + float2 gradX = ddx(rawUV) * scale * biasFactor; + float2 gradY = ddy(rawUV) * scale * biasFactor; + float2 atlasUV = nrmAtlasRect.xy + frac(rawUV) * scale; + + nrm = perturb_normal_from_grad( + Input.vNormalVS, + Input.vViewPosition, + TX_AtlasNormal, + float3(atlasUV, slice), + gradX, gradY, + SS_Linear, + MI_NormalmapStrength); + } + else + { + nrm = normalize(Input.vNormalVS); + } + + // --- FX map --- + float4 fx = 1.0f; + if (Input.vFlags & 2u) + { + fx = SampleAtlas(TX_AtlasFx, SS_Linear, Input.vFxAtlas3D, Input.vFxAtlasRect, DIST_LodBias); + } + + output.vDiffuse = float4(color.rgb, Input.vDiffuse.y); + + output.vNrm = EncodeNormalGBuffer(nrm); + + output.vSI_SP.x = MI_SpecularIntensity * fx.r; + output.vSI_SP.y = MI_SpecularPower * fx.g; + + output.vVelocity = CalculateVelocity(Input.vCurrClipPos, Input.vPrevClipPos); + + return output; +} diff --git a/D3D11Engine/Shaders/Toolbox.h b/D3D11Engine/Shaders/Toolbox.h index 5a22a510..40dd8f00 100644 --- a/D3D11Engine/Shaders/Toolbox.h +++ b/D3D11Engine/Shaders/Toolbox.h @@ -40,4 +40,16 @@ float3 perturb_normal( float3 N, float3 V, Texture2D normalmap, float2 texcoord, float3x3 TBN = cotangent_frame( N, -V, texcoord ); return normalize( mul(transpose(TBN), nrmmap) ); +} + +// Atlas variant: samples from a Texture2DArray using SampleGrad for correct mip selection +float3 perturb_normal_from_grad( float3 N, float3 V, Texture2DArray normalmap, float3 uvSlice, float2 gradX, float2 gradY, SamplerState samplerState, float normalmapDepth = 1.0f) +{ + float3 nrmmap = normalmap.SampleGrad(samplerState, uvSlice, gradX, gradY).xyz * 2 - 1; + nrmmap.xy *= -1.0f; + nrmmap.xy *= normalmapDepth; + nrmmap = normalize(nrmmap); + + float3x3 TBN = cotangent_frame( N, -V, uvSlice.xy ); + return normalize( mul(transpose(TBN), nrmmap) ); } \ No newline at end of file diff --git a/D3D11Engine/Shaders/VS_ExInstancedObjIndirect.hlsl b/D3D11Engine/Shaders/VS_ExInstancedObjIndirect.hlsl new file mode 100644 index 00000000..0b063d50 --- /dev/null +++ b/D3D11Engine/Shaders/VS_ExInstancedObjIndirect.hlsl @@ -0,0 +1,217 @@ +//-------------------------------------------------------------------------------------- +// Simple vertex shader +//-------------------------------------------------------------------------------------- + +#include "Globals_VS_ExConstants.h" + +cbuffer Matrices_PerFrame : register( b0 ) +{ + VS_ExConstantBuffer_PerFrame frame; +}; + +cbuffer WindParams : register(b1) +{ + float3 windDir; + float globalTime; + float minHeight; + float maxHeight; + float2 padding0; + float3 playerPos; + float padding1; +}; + +StructuredBuffer instances : register(t1); + +// Unpack DWORD color (R8G8B8A8_UNORM layout) to float4 +float4 UnpackColor(uint packed) +{ + return float4( + float(packed & 0xFF) / 255.0, + float((packed >> 8) & 0xFF) / 255.0, + float((packed >> 16) & 0xFF) / 255.0, + float((packed >> 24) & 0xFF) / 255.0 + ); +} + +//-------------------------------------------------------------------------------------- +// Input / Output structures +//-------------------------------------------------------------------------------------- +struct VS_INPUT +{ + float3 vPosition : POSITION; + float3 vNormal : NORMAL; + float2 vTex1 : TEXCOORD0; + float2 vTex2 : TEXCOORD1; + float4 vDiffuse : DIFFUSE; + + // The Input Assembler automatically adds StartInstanceLocation to this fetch! + uint instanceID : INSTANCE_ID; +}; + +struct VS_OUTPUT +{ + float2 vTexcoord : TEXCOORD0; + float2 vTexcoord2 : TEXCOORD1; + float4 vDiffuse : TEXCOORD2; + float3 vNormalVS : TEXCOORD4; + float3 vViewPosition : TEXCOORD5; + float4 vCurrClipPos : TEXCOORD6; // Current clip position for velocity + float4 vPrevClipPos : TEXCOORD7; // Previous clip position for velocity + + float4 vPosition : SV_POSITION; +}; + +#if SHD_WIND + +//less then trunkStiffness (%) will be absolutely stay, like tree trunk +static const float trunkStiffness = 0.12f; +static const float phaseVariation = 0.40f; +static const float windStrengMult = 16.0f; // original engine uses [0.1 -> 5] range, we use higher values in formulas +static const float PI_2 = 6.283185; // 2 * PI + +float GetInstancePhaseOffset(float4x4 objMatrix) +{ + // Random seed by object's matrix + // Combine object matrix and maxHeight for more stable randomness + float seed = dot(objMatrix._11_22_33, float3(12.9898, 78.233, 53.539)) + maxHeight; + return frac(sin(seed) * 43758.5453) * phaseVariation; +} + +float3 ApplyTreeWind(float3 vertexPos, float3 direction, float heightNorm, float timeSec, float4x4 instMatrix, float windStrength) +{ + // Calculate if vertex should be affected (1 if heightNorm >= trunkStiffness, 0 otherwise) + float shouldAffect = saturate(sign(heightNorm - trunkStiffness + 0.0001f)); + + float instancePhase = GetInstancePhaseOffset(instMatrix) * PI_2; + + // Smooth height factor with more natural falloff + float adjustedHeight = saturate((heightNorm - trunkStiffness) / (1.0 - trunkStiffness)) * shouldAffect; + float heightFactor = pow(adjustedHeight, 2.6f); + + // Main wave + float mainWave = sin(timeSec * 1.0 + heightNorm * 3.0 + instancePhase) * 0.8; + + // Second wave + float secondaryWave = cos(timeSec * 0.7 + heightNorm * 5.0 + instancePhase * 1.5) * 0.80; + + // Inertia + float inertiaEffect = sin(timeSec * 0.3 + heightNorm * 8.0) * 0.1; + + // Height amplitude + float topSmoothing = smoothstep(0.7, 0.9, adjustedHeight); + + // Combine waves + float combinedWave = (mainWave + secondaryWave * 0.5) * (1.0 - topSmoothing * 0.3) + inertiaEffect * topSmoothing; + + // Chaotical motion + float leafTurbulence = (sin(timeSec * 4.0 + vertexPos.x * 15.0) + + cos(timeSec * 3.7 + vertexPos.z * 12.0)) * 0.05 * topSmoothing; + + // Final offset + float3 windOffset = direction * windStrength * windStrengMult * + (combinedWave + leafTurbulence) * heightFactor; + + return windOffset; +} +#endif + +#if SHD_INFLUENCE + +// HERO AFFECTS CONST +static const float heroAffectRange = 100.0f; +static const float heroAffectStrength = 38.0f; + +float3 CalculatePlayerInfluence( + float3 playerPos, + float3 vertexLocalPos, + float minHeight, + float maxHeight, + float4x4 instWorldMatrix +) +{ + float heightRange = max(maxHeight - minHeight, 0.001); + float vertexHeightNorm = saturate((vertexLocalPos.y - minHeight) / heightRange); + + // 15% of object height check + float heightMask = smoothstep(0.14, 0.16, vertexHeightNorm); + + float3 vertexWorldPos = mul(float4(vertexLocalPos, 1.0), instWorldMatrix).xyz; + float3 toVertex = vertexWorldPos - playerPos; + + float3 displaceDirWorld = lerp(float3(0, 1, 0), normalize(toVertex), step(0.001, length(toVertex))); + + float distanceXZ = length(toVertex.xz); + float distanceFactor = exp(-(distanceXZ*distanceXZ)/(1.8*heroAffectRange*heroAffectRange)); + + float influence = distanceFactor * vertexHeightNorm * heightMask; + + float randomOffset = frac(sin(dot(vertexLocalPos.xz, float2(12.9898, 78.233))) * 43758.5453); + influence *= 0.9 + 0.1 * randomOffset; + + float3 displaceDirLocal = normalize(mul(displaceDirWorld, (float3x3)instWorldMatrix)); + return displaceDirLocal * heroAffectStrength * influence; +} +#endif + +//-------------------------------------------------------------------------------------- +// Vertex Shader +//-------------------------------------------------------------------------------------- +VS_OUTPUT VSMain( VS_INPUT Input ) +{ + VS_OUTPUT Output; + + // Base vertex position (local) + float3 position = Input.vPosition; + VobInstanceInfo instance = instances[Input.instanceID]; + +#if SHD_INFLUENCE + + if (instance.canBeAffectedByPlayer > 0) + { + // HERO MOVING BUSHES SHADER + position += CalculatePlayerInfluence(playerPos, position, minHeight, maxHeight, instance.world); + } +#endif + +#if SHD_WIND + + if (instance.windStrenth > 0) + { + // WIND SHADER + // Protect 0 height + float heightRange = max(maxHeight - minHeight, 0.001); + float vertexHeightNorm = saturate((Input.vPosition.y - minHeight) / heightRange); + + // Apply wind + position += ApplyTreeWind( + Input.vPosition, + normalize(windDir), + vertexHeightNorm, + globalTime, + instance.world, + instance.windStrenth + ); + } +#endif + + // Common processing for both cases + float3 worldPos = mul(float4(position, 1.0), instance.world).xyz; + + // Calculate previous world position for motion vectors + float3 prevWorldPos = mul(float4(position, 1.0), instance.prevWorld).xyz; + + Output.vPosition = mul(float4(worldPos, 1.0), frame.M_ViewProj); + Output.vTexcoord = Input.vTex1; + Output.vTexcoord2 = Input.vTex2; + Output.vDiffuse = UnpackColor(instance.color); + Output.vNormalVS = mul(Input.vNormal, mul((float3x3)instance.world, (float3x3)frame.M_View)); + Output.vViewPosition = mul(float4(worldPos, 1.0), frame.M_View); + + // Store clip positions for velocity calculation in pixel shader + // Use UNJITTERED matrices for correct velocity (jitter would cause incorrect motion) + Output.vCurrClipPos = mul(float4(worldPos, 1.0), frame.M_UnjitteredViewProj); + Output.vPrevClipPos = mul(float4(prevWorldPos, 1.0), frame.M_PrevViewProj); + + return Output; +} + diff --git a/D3D11Engine/Shaders/VS_ExInstancedObjIndirectAtlas.hlsl b/D3D11Engine/Shaders/VS_ExInstancedObjIndirectAtlas.hlsl new file mode 100644 index 00000000..1623642a --- /dev/null +++ b/D3D11Engine/Shaders/VS_ExInstancedObjIndirectAtlas.hlsl @@ -0,0 +1,215 @@ +//-------------------------------------------------------------------------------------- +// Instanced vertex shader for atlas indirect draw path +// Uses StructuredBuffer for per-instance data including atlas UV rect +//-------------------------------------------------------------------------------------- + +#include "Globals_VS_ExConstants.h" + +cbuffer Matrices_PerFrame : register( b0 ) +{ + VS_ExConstantBuffer_PerFrame frame; +}; + +cbuffer WindParams : register(b1) +{ + float3 windDir; + float globalTime; + float minHeight; + float maxHeight; + float2 padding0; + float3 playerPos; + float padding1; +}; + +struct VobInstanceInfoAtlas { + float4x4 world; + float4x4 prevWorld; + uint color; + float windStrength; + float canBeAffectedByPlayer; + int slice; + float uStart; + float vStart; + float uEnd; + float vEnd; + uint globalSourceIndex; + float minHeight; + float maxHeight; +}; + +StructuredBuffer instances : register(t1); + +// Unpack DWORD color (R8G8B8A8_UNORM layout) to float4 +float4 UnpackColor(uint packed) +{ + return float4( + float(packed & 0xFF) / 255.0, + float((packed >> 8) & 0xFF) / 255.0, + float((packed >> 16) & 0xFF) / 255.0, + float((packed >> 24) & 0xFF) / 255.0 + ); +} + +//-------------------------------------------------------------------------------------- +// Input / Output structures +//-------------------------------------------------------------------------------------- +struct VS_INPUT +{ + float3 vPosition : POSITION; + float3 vNormal : NORMAL; + float2 vTex1 : TEXCOORD0; + float2 vTex2 : TEXCOORD1; + float4 vDiffuse : DIFFUSE; + + // The Input Assembler automatically adds StartInstanceLocation to this fetch! + uint instanceID : INSTANCE_REMAP_INDEX; +}; + +struct VS_OUTPUT +{ + float3 vTexcoord3D : TEXCOORD0; // (rawU, rawV, slice) — raw UVs passed to PS for per-pixel atlas remap + float2 vTexcoord2 : TEXCOORD1; + float4 vDiffuse : TEXCOORD2; + float4 vAtlasRect : TEXCOORD3; // (uStart, vStart, uEnd, vEnd) — atlas sub-rect for PS remap + float3 vNormalVS : TEXCOORD4; + float3 vViewPosition : TEXCOORD5; + float4 vCurrClipPos : TEXCOORD6; + float4 vPrevClipPos : TEXCOORD7; + + float4 vPosition : SV_POSITION; +}; + +#if SHD_WIND + +//less then trunkStiffness (%) will be absolutely stay, like tree trunk +static const float trunkStiffness = 0.12f; +static const float phaseVariation = 0.40f; +static const float windStrengMult = 16.0f; +static const float PI_2 = 6.283185; + +float GetInstancePhaseOffset(float4x4 objMatrix, float maxH) +{ + float seed = dot(objMatrix._11_22_33, float3(12.9898, 78.233, 53.539)) + maxH; + return frac(sin(seed) * 43758.5453) * phaseVariation; +} + +float3 ApplyTreeWind(float3 vertexPos, float3 direction, float heightNorm, float timeSec, float4x4 instMatrix, float windStrength, float maxH) +{ + float shouldAffect = saturate(sign(heightNorm - trunkStiffness + 0.0001f)); + + float instancePhase = GetInstancePhaseOffset(instMatrix, maxH) * PI_2; + + float adjustedHeight = saturate((heightNorm - trunkStiffness) / (1.0 - trunkStiffness)) * shouldAffect; + float heightFactor = pow(adjustedHeight, 2.6f); + + float mainWave = sin(timeSec * 1.0 + heightNorm * 3.0 + instancePhase) * 0.8; + float secondaryWave = cos(timeSec * 0.7 + heightNorm * 5.0 + instancePhase * 1.5) * 0.80; + float inertiaEffect = sin(timeSec * 0.3 + heightNorm * 8.0) * 0.1; + + float topSmoothing = smoothstep(0.7, 0.9, adjustedHeight); + float combinedWave = (mainWave + secondaryWave * 0.5) * (1.0 - topSmoothing * 0.3) + inertiaEffect * topSmoothing; + + float leafTurbulence = (sin(timeSec * 4.0 + vertexPos.x * 15.0) + + cos(timeSec * 3.7 + vertexPos.z * 12.0)) * 0.05 * topSmoothing; + + float3 windOffset = direction * windStrength * windStrengMult * + (combinedWave + leafTurbulence) * heightFactor; + + return windOffset; +} +#endif + +#if SHD_INFLUENCE + +static const float heroAffectRange = 100.0f; +static const float heroAffectStrength = 38.0f; + +float3 CalculatePlayerInfluence( + float3 playerPos, + float3 vertexLocalPos, + float minHeight, + float maxHeight, + float4x4 instWorldMatrix +) +{ + float heightRange = max(maxHeight - minHeight, 0.001); + float vertexHeightNorm = saturate((vertexLocalPos.y - minHeight) / heightRange); + + float heightMask = smoothstep(0.14, 0.16, vertexHeightNorm); + + float3 vertexWorldPos = mul(float4(vertexLocalPos, 1.0), instWorldMatrix).xyz; + float3 toVertex = vertexWorldPos - playerPos; + + float3 displaceDirWorld = lerp(float3(0, 1, 0), normalize(toVertex), step(0.001, length(toVertex))); + + float distanceXZ = length(toVertex.xz); + float distanceFactor = exp(-(distanceXZ*distanceXZ)/(1.8*heroAffectRange*heroAffectRange)); + + float influence = distanceFactor * vertexHeightNorm * heightMask; + + float randomOffset = frac(sin(dot(vertexLocalPos.xz, float2(12.9898, 78.233))) * 43758.5453); + influence *= 0.9 + 0.1 * randomOffset; + + float3 displaceDirLocal = normalize(mul(displaceDirWorld, (float3x3)instWorldMatrix)); + return displaceDirLocal * heroAffectStrength * influence; +} +#endif + +//-------------------------------------------------------------------------------------- +// Vertex Shader +//-------------------------------------------------------------------------------------- +VS_OUTPUT VSMain( VS_INPUT Input ) +{ + VS_OUTPUT Output; + + float3 position = Input.vPosition; + VobInstanceInfoAtlas inst = instances[Input.instanceID]; + +#if SHD_INFLUENCE + + if (inst.canBeAffectedByPlayer > 0) + { + position += CalculatePlayerInfluence(playerPos, position, inst.minHeight, inst.maxHeight, inst.world); + } +#endif + +#if SHD_WIND + + if (inst.windStrength > 0) + { + float heightRange = max(inst.maxHeight - inst.minHeight, 0.001); + float vertexHeightNorm = saturate((Input.vPosition.y - inst.minHeight) / heightRange); + + position += ApplyTreeWind( + Input.vPosition, + normalize(windDir), + vertexHeightNorm, + globalTime, + inst.world, + inst.windStrength, + inst.maxHeight + ); + } +#endif + + // World-space transform + float3 worldPos = mul(float4(position, 1.0), inst.world).xyz; + float3 prevWorldPos = mul(float4(position, 1.0), inst.prevWorld).xyz; + + Output.vPosition = mul(float4(worldPos, 1.0), frame.M_ViewProj); + + // Pass raw UVs + slice to PS; atlas remapping done per-pixel to avoid frac() interpolation artifacts + Output.vTexcoord3D = float3(Input.vTex1, (float)inst.slice); + Output.vAtlasRect = float4(inst.uStart, inst.vStart, inst.uEnd, inst.vEnd); + + Output.vTexcoord2 = Input.vTex2; + Output.vDiffuse = UnpackColor(inst.color); + Output.vNormalVS = mul(Input.vNormal, mul((float3x3)inst.world, (float3x3)frame.M_View)); + Output.vViewPosition = mul(float4(worldPos, 1.0), frame.M_View); + + // Motion vectors (unjittered) + Output.vCurrClipPos = mul(float4(worldPos, 1.0), frame.M_UnjitteredViewProj); + Output.vPrevClipPos = mul(float4(prevWorldPos, 1.0), frame.M_PrevViewProj); + + return Output; +} diff --git a/D3D11Engine/Shaders/VS_ExWorldAtlas.hlsl b/D3D11Engine/Shaders/VS_ExWorldAtlas.hlsl new file mode 100644 index 00000000..3fd6df7f --- /dev/null +++ b/D3D11Engine/Shaders/VS_ExWorldAtlas.hlsl @@ -0,0 +1,98 @@ +//-------------------------------------------------------------------------------------- +// World mesh vertex shader for atlas indirect draw path +// Reads per-submesh atlas descriptors from a StructuredBuffer. +// The submesh index comes from the instance ID buffer + StartInstanceLocation. +//-------------------------------------------------------------------------------------- + +#include "Globals_VS_ExConstants.h" + +cbuffer Matrices_PerFrame : register( b0 ) +{ + VS_ExConstantBuffer_PerFrame frame; +}; + +struct WorldMeshSubmeshGPUData +{ + int diffuseSlice; + float dUStart, dVStart, dUEnd, dVEnd; + int normalSlice; + float nUStart, nVStart, nUEnd, nVEnd; + int fxSlice; + float fUStart, fVStart, fUEnd, fVEnd; + uint flags; +}; + +StructuredBuffer submeshData : register( t1 ); + +//-------------------------------------------------------------------------------------- +// Input / Output structures +//-------------------------------------------------------------------------------------- +struct VS_INPUT +{ + float3 vPosition : POSITION; + float3 vNormal : NORMAL; + float2 vTex1 : TEXCOORD0; + float2 vTex2 : TEXCOORD1; + float4 vDiffuse : DIFFUSE; + + // StartInstanceLocation in the MDI args offsets this so it equals the submesh index + uint submeshIdx : INSTANCE_REMAP_INDEX; +}; + +struct VS_OUTPUT +{ + float3 vTexcoord3D : TEXCOORD0; // (rawU, rawV, diffuseSlice) + float2 vTexcoord2 : TEXCOORD1; + float4 vDiffuse : TEXCOORD2; + float4 vAtlasRect : TEXCOORD3; // diffuse (uStart, vStart, uEnd, vEnd) + float3 vNormalVS : TEXCOORD4; + float3 vViewPosition : TEXCOORD5; + float4 vCurrClipPos : TEXCOORD6; + float4 vPrevClipPos : TEXCOORD7; + float3 vNormalAtlas3D : TEXCOORD8; // (rawU, rawV, normalSlice) + float4 vNormalAtlasRect : TEXCOORD9; // normal (uStart, vStart, uEnd, vEnd) + float3 vFxAtlas3D : TEXCOORD10; // (rawU, rawV, fxSlice) + nointerpolation uint vFlags : TEXCOORD11; // material flags + float4 vFxAtlasRect : TEXCOORD12; // fx (uStart, vStart, uEnd, vEnd) + float4 vPosition : SV_POSITION; +}; + +//-------------------------------------------------------------------------------------- +// Vertex Shader +//-------------------------------------------------------------------------------------- +VS_OUTPUT VSMain( VS_INPUT Input ) +{ + VS_OUTPUT Output; + + WorldMeshSubmeshGPUData sm = submeshData[Input.submeshIdx]; + + // World mesh vertices are already in world space (M_World = Identity) + float3 positionWorld = Input.vPosition; + + Output.vPosition = mul( float4(positionWorld, 1), frame.M_ViewProj ); + + // Pass raw UVs + slice — PS does frac() and atlas remap per-pixel + Output.vTexcoord3D = float3( Input.vTex1, (float)sm.diffuseSlice ); + Output.vAtlasRect = float4( sm.dUStart, sm.dVStart, sm.dUEnd, sm.dVEnd ); + + Output.vTexcoord2 = Input.vTex2; + Output.vDiffuse = Input.vDiffuse; + Output.vNormalVS = mul( Input.vNormal, (float3x3)frame.M_View ); + Output.vViewPosition = mul( float4(positionWorld, 1), frame.M_View ).xyz; + + // Normal map atlas coords + Output.vNormalAtlas3D = float3( Input.vTex1, (float)sm.normalSlice ); + Output.vNormalAtlasRect = float4( sm.nUStart, sm.nVStart, sm.nUEnd, sm.nVEnd ); + + // FX map atlas coords + Output.vFxAtlas3D = float3( Input.vTex1, (float)sm.fxSlice ); + Output.vFxAtlasRect = float4( sm.fUStart, sm.fVStart, sm.fUEnd, sm.fVEnd ); + + Output.vFlags = sm.flags; + + // Motion vectors — static world mesh, so prev == current + Output.vCurrClipPos = mul( float4(positionWorld, 1.0), frame.M_UnjitteredViewProj ); + Output.vPrevClipPos = mul( float4(positionWorld, 1.0), frame.M_PrevViewProj ); + + return Output; +} diff --git a/D3D11Engine/Toolbox.cpp b/D3D11Engine/Toolbox.cpp index 0b0fca05..dc504b17 100644 --- a/D3D11Engine/Toolbox.cpp +++ b/D3D11Engine/Toolbox.cpp @@ -150,6 +150,25 @@ namespace Toolbox { return _mm_cvtss_f32( _mm_rcp_ss( _mm_rsqrt_ss( _mm_set_ss( dx * dx + dz * dz ) ) ) ); } + float ComputePointAABBDistanceSq( const XMFLOAT3& p, const XMFLOAT3& min, const XMFLOAT3& max ) { + float dx = std::max( std::max( min.x - p.x, 0.0f ), p.x - max.x ); + float dy = std::max( std::max( min.y - p.y, 0.0f ), p.y - max.y ); + float dz = std::max( std::max( min.z - p.z, 0.0f ), p.z - max.z ); + + return (dx * dx) + (dy * dy) + (dz * dz); + } + + float ComputePointAABBDistanceSq( const XMFLOAT3& p, const DirectX::BoundingBox& box ) { + // 1. Get absolute distance from point to the center of the box + // 2. Subtract the box extents to get the distance to the edge + // 3. Clamp to 0 if the point is inside the box bounds along that axis + float dx = std::max( 0.0f, std::abs( p.x - box.Center.x ) - box.Extents.x ); + float dy = std::max( 0.0f, std::abs( p.y - box.Center.y ) - box.Extents.y ); + float dz = std::max( 0.0f, std::abs( p.z - box.Center.z ) - box.Extents.z ); + + return (dx * dx) + (dy * dy) + (dz * dz); + } + /** Computes the Normal of a triangle */ FXMVECTOR ComputeNormal( const XMFLOAT3& v0, const XMFLOAT3& v1, const XMFLOAT3& v2 ) { FXMVECTOR Normal = XMVector3Normalize( XMVector3Cross( (XMLoadFloat3( &v1 ) - XMLoadFloat3( &v0 )), (XMLoadFloat3( &v2 ) - XMLoadFloat3( &v0 )) ) ); diff --git a/D3D11Engine/Toolbox.h b/D3D11Engine/Toolbox.h index 371dc8b7..2b06c1c6 100644 --- a/D3D11Engine/Toolbox.h +++ b/D3D11Engine/Toolbox.h @@ -6,6 +6,7 @@ #include #include "Types.h" +#include /** Misc. tools */ enum zTCam_ClipType; @@ -171,6 +172,10 @@ namespace Toolbox { /** Computes the distance of a point to an AABB */ float ComputePointAABBDistance( const XMFLOAT3& p, const XMFLOAT3& min, const XMFLOAT3& max ); + float ComputePointAABBDistanceSq(const XMFLOAT3& p, const XMFLOAT3& min, const XMFLOAT3& max); + + float ComputePointAABBDistanceSq(const XMFLOAT3& p, const DirectX::BoundingBox& box); + /** Returns whether the given file exists */ bool FileExists( const std::string& file ); diff --git a/D3D11Engine/VobCulling.cpp b/D3D11Engine/VobCulling.cpp new file mode 100644 index 00000000..44a6ea59 --- /dev/null +++ b/D3D11Engine/VobCulling.cpp @@ -0,0 +1,155 @@ +#include "VobCulling.h" +#include +#include "ConstantBufferStructs.h" +#include "WorldObjects.h" +#include "zCModel.h" +#include "zCMaterial.h" +#include + +using namespace DirectX; + +void VobCulling::CullAndGatherStaticVOBs_AVX2( + const std::vector& batches, + const std::vector& instances, + const DirectX::XMFLOAT4 planes[6], + std::vector& outRenderQueue ) +{ + outRenderQueue.clear(); + // Pre-reserve to avoid reallocations + outRenderQueue.reserve( instances.size() * 2 ); + + const __m256 abs_mask = _mm256_castsi256_ps( _mm256_set1_epi32( 0x7FFFFFFF ) ); + const __m256 zero = _mm256_setzero_ps(); + + struct alignas(32) SIMDPlane { + __m256 nx, ny, nz, d; + __m256 abs_nx, abs_ny, abs_nz; + }; + + SIMDPlane splanes[6]; + for ( int p = 0; p < 6; ++p ) { + splanes[p].nx = _mm256_set1_ps( planes[p].x ); + splanes[p].ny = _mm256_set1_ps( planes[p].y ); + splanes[p].nz = _mm256_set1_ps( planes[p].z ); + splanes[p].d = _mm256_set1_ps( planes[p].w ); + + splanes[p].abs_nx = _mm256_and_ps( splanes[p].nx, abs_mask ); + splanes[p].abs_ny = _mm256_and_ps( splanes[p].ny, abs_mask ); + splanes[p].abs_nz = _mm256_and_ps( splanes[p].nz, abs_mask ); + } + + for ( size_t i = 0; i < batches.size(); ++i ) { + const AABB_SoA_Batch8& batch = batches[i]; + + __m256 cx = _mm256_load_ps( batch.cx ); + __m256 cy = _mm256_load_ps( batch.cy ); + __m256 cz = _mm256_load_ps( batch.cz ); + __m256 ex = _mm256_load_ps( batch.ex ); + __m256 ey = _mm256_load_ps( batch.ey ); + __m256 ez = _mm256_load_ps( batch.ez ); + + __m256 v_mask = _mm256_castsi256_ps( _mm256_set1_epi32( 0xFFFFFFFF ) ); + + for ( int p = 0; p < 6; ++p ) { + __m256 nx = splanes[p].nx; + __m256 ny = splanes[p].ny; + __m256 nz = splanes[p].nz; + __m256 d = splanes[p].d; + + __m256 abs_nx = splanes[p].abs_nx; + __m256 abs_ny = splanes[p].abs_ny; + __m256 abs_nz = splanes[p].abs_nz; + + __m256 r = _mm256_mul_ps( ex, abs_nx ); + r = _mm256_fmadd_ps( ey, abs_ny, r ); + r = _mm256_fmadd_ps( ez, abs_nz, r ); + + __m256 dist = _mm256_fmadd_ps( cx, nx, d ); + dist = _mm256_fmadd_ps( cy, ny, dist ); + dist = _mm256_fmadd_ps( cz, nz, dist ); + + __m256 outside = _mm256_cmp_ps( _mm256_sub_ps( dist, r ), zero, _CMP_GT_OQ ); + v_mask = _mm256_andnot_ps( outside, v_mask ); + } + + uint32_t mask = _mm256_movemask_ps( v_mask ); + + // INSTANT SKIP: If mask is 0, all 8 items are outside the frustum. + if ( mask == 0 ) continue; + + // BIT SCAN: Extract visible items efficiently + while ( mask != 0 ) { + // Find the index of the lowest set bit (0 to 7) + uint32_t bitIndex = _tzcnt_u32( mask ); + + // Calculate actual instance index + uint32_t instanceIdx = (i * 8) + bitIndex; + + // Push to dense render queue + if (instances[instanceIdx]) { + outRenderQueue.push_back( { + instanceIdx, + reinterpret_cast(instances[instanceIdx]->VisualInfo), + } ); + } + + // Clear the lowest set bit so we can find the next one + // e.g., 010100 -> 010000 + mask &= (mask - 1); + } + } +} + +void VobCulling::CullAndGatherStaticVOBs_DirectXMath( + const std::vector& batches, + const std::vector& instances, + const XMFLOAT4 planes[6], + std::vector& outRenderQueue ) +{ + outRenderQueue.clear(); + // Pre-reserve to avoid reallocations + outRenderQueue.reserve( instances.size() * 2 ); + + for ( size_t i = 0; i < batches.size(); ++i ) { + const AABB_SoA_Batch8& batch = batches[i]; + + // Process each of the 8 AABBs in this batch + for ( int j = 0; j < 8; ++j ) { + XMFLOAT3 center( batch.cx[j], batch.cy[j], batch.cz[j] ); + XMFLOAT3 extents( batch.ex[j], batch.ey[j], batch.ez[j] ); + + bool visible = true; + + // Test against all 6 frustum planes + for ( int p = 0; p < 6; ++p ) { + // Get absolute values of plane normal components + float abs_nx = std::abs( planes[p].x ); + float abs_ny = std::abs( planes[p].y ); + float abs_nz = std::abs( planes[p].z ); + + // Calculate the radius (projected extent along plane normal) + float r = extents.x * abs_nx + extents.y * abs_ny + extents.z * abs_nz; + + // Calculate distance from center to plane + float dist = center.x * planes[p].x + center.y * planes[p].y + center.z * planes[p].z + planes[p].w; + + // If dist - r > 0, box is completely outside this plane + if ( dist - r > 0.0f ) { + visible = false; + break; + } + } + + if ( visible ) { + uint32_t instanceIdx = static_cast(i * 8 + j); + + if (instances[instanceIdx]) { + outRenderQueue.push_back( { + instanceIdx, + reinterpret_cast(instances[instanceIdx]->VisualInfo), + } ); + } + } + } + } +} diff --git a/D3D11Engine/VobCulling.h b/D3D11Engine/VobCulling.h new file mode 100644 index 00000000..4eb9bae9 --- /dev/null +++ b/D3D11Engine/VobCulling.h @@ -0,0 +1,52 @@ +#pragma once + +#include +#include +#include +#include +#include + +// 1. The SoAoS Bounding Data (Aligned for AVX) +struct alignas(32) AABB_SoA_Batch8 { + float cx[8], cy[8], cz[8]; + float ex[8], ey[8], ez[8]; +}; + +// 3. The Dense Render Item (Output of the culler) +struct StaticVobRenderItem { + uint32_t instanceIndex; // index into an VobInfo* + struct MeshVisualInfo* mvi; +}; + +struct VobInfo; + +class VobCulling +{ +public: + static void CullAndGatherStaticVOBs( + const std::vector& batches, + const std::vector& instances, + const DirectX::XMFLOAT4 planes[6], + std::vector& outRenderQueue ) { +#ifdef __AVX2__ + CullAndGatherStaticVOBs_AVX2( batches, instances, planes, outRenderQueue ); +#else + CullAndGatherStaticVOBs_DirectXMath( batches, instances, planes, outRenderQueue ); +#endif + } + +private: + static void CullAndGatherStaticVOBs_AVX2( + const std::vector& batches, + const std::vector& instances, + const DirectX::XMFLOAT4 planes[6], + std::vector& outRenderQueue ); + + // DirectXMath-based alternative for debugging/verification + static void CullAndGatherStaticVOBs_DirectXMath( + const std::vector& batches, + const std::vector& instances, + const DirectX::XMFLOAT4 planes[6], + std::vector& outRenderQueue ); +}; + diff --git a/D3D11Engine/packages.config b/D3D11Engine/packages.config index 44327afd..6c5d5b62 100644 --- a/D3D11Engine/packages.config +++ b/D3D11Engine/packages.config @@ -2,6 +2,7 @@ + \ No newline at end of file