diff --git a/src/Backends/DirectX12/include/litefx/backends/dx12.hpp b/src/Backends/DirectX12/include/litefx/backends/dx12.hpp index d15a73d9..366ab4e7 100644 --- a/src/Backends/DirectX12/include/litefx/backends/dx12.hpp +++ b/src/Backends/DirectX12/include/litefx/backends/dx12.hpp @@ -1000,9 +1000,12 @@ namespace LiteFX::Rendering::Backends { public: using base_type = CommandBuffer; using base_type::dispatch; + using base_type::dispatchIndirect; using base_type::dispatchMesh; using base_type::draw; + using base_type::drawIndirect; using base_type::drawIndexed; + using base_type::drawIndexedIndirect; using base_type::barrier; using base_type::transfer; using base_type::generateMipMaps; @@ -1143,13 +1146,37 @@ namespace LiteFX::Rendering::Backends { void dispatch(const Vector3u& threadCount) const noexcept override; /// - void dispatchMesh (const Vector3u& threadCount) const noexcept override; + void dispatchIndirect(const IDirectX12Buffer& batchBuffer, UInt32 batchCount, UInt64 offset = 0) const noexcept override; + + /// + void dispatchIndirect(const IDirectX12Buffer& batchBuffer, const IDirectX12Buffer& countBuffer, UInt64 offset = 0, UInt64 countOffset = 0, UInt32 maxBatches = std::numeric_limits::max()) const noexcept; + + /// + void dispatchMesh(const Vector3u& threadCount) const noexcept override; + + /// + void dispatchMeshIndirect(const IDirectX12Buffer& batchBuffer, UInt32 batchCount, UInt64 offset = 0) const noexcept override; + + /// + void dispatchMeshIndirect(const IDirectX12Buffer& batchBuffer, const IDirectX12Buffer& countBuffer, UInt64 offset = 0, UInt64 countOffset = 0, UInt32 maxBatches = std::numeric_limits::max()) const noexcept override; /// void draw(UInt32 vertices, UInt32 instances = 1, UInt32 firstVertex = 0, UInt32 firstInstance = 0) const noexcept override; + /// + void drawIndirect(const IDirectX12Buffer& batchBuffer, UInt32 batchCount, UInt64 offset = 0) const noexcept override; + + /// + void drawIndirect(const IDirectX12Buffer& batchBuffer, const IDirectX12Buffer& countBuffer, UInt64 offset = 0, UInt64 countOffset = 0, UInt32 maxBatches = std::numeric_limits::max()) const noexcept override; + /// void drawIndexed(UInt32 indices, UInt32 instances = 1, UInt32 firstIndex = 0, Int32 vertexOffset = 0, UInt32 firstInstance = 0) const noexcept override; + + /// + void drawIndexedIndirect(const IDirectX12Buffer& batchBuffer, UInt32 batchCount, UInt64 offset = 0) const noexcept override; + + /// + void drawIndexedIndirect(const IDirectX12Buffer& batchBuffer, const IDirectX12Buffer& countBuffer, UInt64 offset = 0, UInt64 countOffset = 0, UInt32 maxBatches = std::numeric_limits::max()) const noexcept override; /// void pushConstants(const DirectX12PushConstantsLayout& layout, const void* const memory) const noexcept override; @@ -1984,6 +2011,15 @@ namespace LiteFX::Rendering::Backends { /// virtual DirectX12ComputePipeline& blitPipeline() const noexcept; + /// + /// Returns the command signatures for indirect dispatch and draw calls. + /// + /// The command signature used to execute indirect dispatches. + /// The command signature used to execute indirect mesh shader dispatches. + /// The command signature used to execute indirect non-indexed draw calls. + /// The command signature used to execute indirect indexed draw calls. + virtual void indirectDrawSignatures(ComPtr& dispatchSignature, ComPtr& dispatchMeshSignature, ComPtr& drawSignature, ComPtr& drawIndexedSignature) const noexcept; + // GraphicsDevice interface. public: /// diff --git a/src/Backends/DirectX12/include/litefx/backends/dx12_builders.hpp b/src/Backends/DirectX12/include/litefx/backends/dx12_builders.hpp index 960a2ace..84436880 100644 --- a/src/Backends/DirectX12/include/litefx/backends/dx12_builders.hpp +++ b/src/Backends/DirectX12/include/litefx/backends/dx12_builders.hpp @@ -158,8 +158,7 @@ namespace LiteFX::Rendering::Backends { /// The parent pipeline layout builder. /// The space the descriptor set is bound to. /// The shader stages, the descriptor set is accessible from. - /// Ignored for DirectX 12, but required for compatibility. - constexpr inline explicit DirectX12DescriptorSetLayoutBuilder(DirectX12PipelineLayoutBuilder& parent, UInt32 space = 0, ShaderStage stages = ShaderStage::Any, UInt32 maxUnboundedArraySize = 0); + constexpr inline explicit DirectX12DescriptorSetLayoutBuilder(DirectX12PipelineLayoutBuilder& parent, UInt32 space = 0, ShaderStage stages = ShaderStage::Any); DirectX12DescriptorSetLayoutBuilder(const DirectX12DescriptorSetLayoutBuilder&) = delete; DirectX12DescriptorSetLayoutBuilder(DirectX12DescriptorSetLayoutBuilder&&) = delete; constexpr inline virtual ~DirectX12DescriptorSetLayoutBuilder() noexcept; @@ -236,8 +235,7 @@ namespace LiteFX::Rendering::Backends { /// /// The space, the descriptor set is bound to. /// The stages, the descriptor set will be accessible from. - /// Unused for this backend. - constexpr inline DirectX12DescriptorSetLayoutBuilder descriptorSet(UInt32 space = 0, ShaderStage stages = ShaderStage::Any, UInt32 maxUnboundedArraySize = 0); + constexpr inline DirectX12DescriptorSetLayoutBuilder descriptorSet(UInt32 space = 0, ShaderStage stages = ShaderStage::Any); /// /// Builds a new push constants layout for the pipeline layout. diff --git a/src/Backends/DirectX12/src/blas.cpp b/src/Backends/DirectX12/src/blas.cpp index a7bd27b1..a10c3a98 100644 --- a/src/Backends/DirectX12/src/blas.cpp +++ b/src/Backends/DirectX12/src/blas.cpp @@ -103,11 +103,11 @@ class DirectX12BottomLevelAccelerationStructure::DirectX12BottomLevelAcceleratio }; // Transition the buffer into UAV state. We create manual barriers here, as the special access flag is only required in this specific situation. - CD3DX12_BUFFER_BARRIER preBarrier[2] = { + CD3DX12_BUFFER_BARRIER preBarrier[1] = { CD3DX12_BUFFER_BARRIER(afterCopy ? D3D12_BARRIER_SYNC_COPY_RAYTRACING_ACCELERATION_STRUCTURE : D3D12_BARRIER_SYNC_BUILD_RAYTRACING_ACCELERATION_STRUCTURE, D3D12_BARRIER_SYNC_EMIT_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO, D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_WRITE, D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ, std::as_const(*m_buffer).handle().Get()), - CD3DX12_BUFFER_BARRIER(D3D12_BARRIER_SYNC_NONE, D3D12_BARRIER_SYNC_EMIT_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO, D3D12_BARRIER_ACCESS_NO_ACCESS, D3D12_BARRIER_ACCESS_UNORDERED_ACCESS, std::as_const(*m_postBuildBuffer).handle().Get()), + //CD3DX12_BUFFER_BARRIER(D3D12_BARRIER_SYNC_NONE, D3D12_BARRIER_SYNC_EMIT_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO, D3D12_BARRIER_ACCESS_NO_ACCESS, D3D12_BARRIER_ACCESS_UNORDERED_ACCESS, std::as_const(*m_postBuildBuffer).handle().Get()), }; - auto preBarrierGroup = CD3DX12_BARRIER_GROUP(2, preBarrier); + auto preBarrierGroup = CD3DX12_BARRIER_GROUP(1, preBarrier); commandBuffer.handle()->Barrier(1, &preBarrierGroup); // Emit the diff --git a/src/Backends/DirectX12/src/buffer.cpp b/src/Backends/DirectX12/src/buffer.cpp index c156b748..ac5f54ca 100644 --- a/src/Backends/DirectX12/src/buffer.cpp +++ b/src/Backends/DirectX12/src/buffer.cpp @@ -91,16 +91,10 @@ void DirectX12Buffer::map(const void* const data, size_t size, UInt32 element) if (element >= m_impl->m_elements) [[unlikely]] throw ArgumentOutOfRangeException("element", 0u, m_impl->m_elements, element, "The element {0} is out of range. The buffer only contains {1} elements.", element, m_impl->m_elements); - size_t alignedSize = size; - size_t alignment = this->elementAlignment(); - - if (alignment > 0) - alignedSize = (size + alignment - 1) & ~(alignment - 1); - - D3D12_RANGE mappedRange = {}; + D3D12_RANGE mappedRange = { }; char* buffer; raiseIfFailed(this->handle()->Map(0, &mappedRange, reinterpret_cast(&buffer)), "Unable to map buffer memory."); - auto result = ::memcpy_s(reinterpret_cast(buffer + (element * alignedSize)), alignedSize, data, size); + auto result = ::memcpy_s(reinterpret_cast(buffer + (element * this->alignedElementSize())), this->size(), data, size); this->handle()->Unmap(0, nullptr); if (result != 0) [[unlikely]] @@ -117,18 +111,12 @@ void DirectX12Buffer::map(void* data, size_t size, UInt32 element, bool write) if (element >= m_impl->m_elements) [[unlikely]] throw ArgumentOutOfRangeException("element", 0u, m_impl->m_elements, element, "The element {0} is out of range. The buffer only contains {1} elements.", element, m_impl->m_elements); - size_t alignedSize = size; - size_t alignment = this->elementAlignment(); - - if (alignment > 0) - alignedSize = (size + alignment - 1) & ~(alignment - 1); - - D3D12_RANGE mappedRange = {}; + D3D12_RANGE mappedRange = { }; char* buffer; raiseIfFailed(this->handle()->Map(0, &mappedRange, reinterpret_cast(&buffer)), "Unable to map buffer memory."); auto result = write ? - ::memcpy_s(reinterpret_cast(buffer + (element * alignedSize)), alignedSize, data, size) : - ::memcpy_s(data, size, reinterpret_cast(buffer + (element * alignedSize)), alignedSize); + ::memcpy_s(reinterpret_cast(buffer + (element * this->alignedElementSize())), this->size(), data, size) : + ::memcpy_s(data, size, reinterpret_cast(buffer + (element * this->alignedElementSize())), size); this->handle()->Unmap(0, nullptr); diff --git a/src/Backends/DirectX12/src/command_buffer.cpp b/src/Backends/DirectX12/src/command_buffer.cpp index 4a09a7bc..2761727a 100644 --- a/src/Backends/DirectX12/src/command_buffer.cpp +++ b/src/Backends/DirectX12/src/command_buffer.cpp @@ -16,6 +16,7 @@ class DirectX12CommandBuffer::DirectX12CommandBufferImpl : public Implement> m_sharedResources; const DirectX12PipelineState* m_lastPipeline = nullptr; + ComPtr m_dispatchSignature, m_drawSignature, m_drawIndexedSignature, m_dispatchMeshSignature; public: DirectX12CommandBufferImpl(DirectX12CommandBuffer* parent, const DirectX12Queue& queue) : @@ -26,6 +27,9 @@ class DirectX12CommandBuffer::DirectX12CommandBufferImpl : public Implement initialize(bool begin, bool primary) { + // Store the command signatures for indirect drawing. + m_queue.device().indirectDrawSignatures(m_dispatchSignature, m_dispatchMeshSignature, m_drawSignature, m_drawIndexedSignature); + // Create a command allocator. D3D12_COMMAND_LIST_TYPE type; @@ -275,7 +279,7 @@ void DirectX12CommandBuffer::generateMipMaps(IDirectX12Image& image) noexcept this->bind(*samplerBindings, pipeline); // Transition the texture into a read/write state. - DirectX12Barrier startBarrier(PipelineStage::None, PipelineStage::Compute); + DirectX12Barrier startBarrier(PipelineStage::All, PipelineStage::Compute); startBarrier.transition(image, ResourceAccess::None, ResourceAccess::ShaderReadWrite, ImageLayout::Undefined, ImageLayout::ReadWrite); this->barrier(startBarrier); auto resource = resourceBindings.begin(); @@ -496,21 +500,61 @@ void DirectX12CommandBuffer::dispatch(const Vector3u& threadCount) const noexcep this->handle()->Dispatch(threadCount.x(), threadCount.y(), threadCount.z()); } +void DirectX12CommandBuffer::dispatchIndirect(const IDirectX12Buffer& batchBuffer, UInt32 batchCount, UInt64 offset) const noexcept +{ + this->handle()->ExecuteIndirect(m_impl->m_dispatchSignature.Get(), batchCount, batchBuffer.handle().Get(), offset, nullptr, 0); +} + +void DirectX12CommandBuffer::dispatchIndirect(const IDirectX12Buffer& batchBuffer, const IDirectX12Buffer& countBuffer, UInt64 offset, UInt64 countOffset, UInt32 maxBatches) const noexcept +{ + this->handle()->ExecuteIndirect(m_impl->m_dispatchSignature.Get(), std::min(maxBatches, static_cast(batchBuffer.alignedElementSize() / sizeof(IndirectDispatchBatch))), batchBuffer.handle().Get(), offset, countBuffer.handle().Get(), countOffset); +} + void DirectX12CommandBuffer::dispatchMesh(const Vector3u& threadCount) const noexcept { this->handle()->DispatchMesh(threadCount.x(), threadCount.y(), threadCount.z()); } +void DirectX12CommandBuffer::dispatchMeshIndirect(const IDirectX12Buffer& batchBuffer, UInt32 batchCount, UInt64 offset) const noexcept +{ + this->handle()->ExecuteIndirect(m_impl->m_dispatchMeshSignature.Get(), batchCount, batchBuffer.handle().Get(), offset, nullptr, 0); +} + +void DirectX12CommandBuffer::dispatchMeshIndirect(const IDirectX12Buffer& batchBuffer, const IDirectX12Buffer& countBuffer, UInt64 offset, UInt64 countOffset, UInt32 maxBatches) const noexcept +{ + this->handle()->ExecuteIndirect(m_impl->m_dispatchMeshSignature.Get(), std::min(maxBatches, static_cast(batchBuffer.alignedElementSize() / sizeof(IndirectDispatchBatch))), batchBuffer.handle().Get(), offset, countBuffer.handle().Get(), countOffset); +} + void DirectX12CommandBuffer::draw(UInt32 vertices, UInt32 instances, UInt32 firstVertex, UInt32 firstInstance) const noexcept { this->handle()->DrawInstanced(vertices, instances, firstVertex, firstInstance); } +void DirectX12CommandBuffer::drawIndirect(const IDirectX12Buffer& batchBuffer, UInt32 batchCount, UInt64 offset) const noexcept +{ + this->handle()->ExecuteIndirect(m_impl->m_drawSignature.Get(), batchCount, batchBuffer.handle().Get(), offset, nullptr, 0); +} + +void DirectX12CommandBuffer::drawIndirect(const IDirectX12Buffer& batchBuffer, const IDirectX12Buffer& countBuffer, UInt64 offset, UInt64 countOffset, UInt32 maxBatches) const noexcept +{ + this->handle()->ExecuteIndirect(m_impl->m_drawSignature.Get(), std::min(maxBatches, static_cast(batchBuffer.alignedElementSize() / sizeof(IndirectBatch))), batchBuffer.handle().Get(), offset, countBuffer.handle().Get(), countOffset); +} + void DirectX12CommandBuffer::drawIndexed(UInt32 indices, UInt32 instances, UInt32 firstIndex, Int32 vertexOffset, UInt32 firstInstance) const noexcept { this->handle()->DrawIndexedInstanced(indices, instances, firstIndex, vertexOffset, firstInstance); } +void DirectX12CommandBuffer::drawIndexedIndirect(const IDirectX12Buffer& batchBuffer, UInt32 batchCount, UInt64 offset) const noexcept +{ + this->handle()->ExecuteIndirect(m_impl->m_drawIndexedSignature.Get(), batchCount, batchBuffer.handle().Get(), offset, nullptr, 0); +} + +void DirectX12CommandBuffer::drawIndexedIndirect(const IDirectX12Buffer& batchBuffer, const IDirectX12Buffer& countBuffer, UInt64 offset, UInt64 countOffset, UInt32 maxBatches) const noexcept +{ + this->handle()->ExecuteIndirect(m_impl->m_drawIndexedSignature.Get(), std::min(maxBatches, static_cast(batchBuffer.alignedElementSize() / sizeof(IndirectIndexedBatch))), batchBuffer.handle().Get(), offset, countBuffer.handle().Get(), countOffset); +} + void DirectX12CommandBuffer::pushConstants(const DirectX12PushConstantsLayout& layout, const void* const memory) const noexcept { std::ranges::for_each(layout.ranges(), [this, &layout, &memory](const DirectX12PushConstantsRange* range) { this->handle()->SetGraphicsRoot32BitConstants(range->rootParameterIndex(), range->size() / 4, reinterpret_cast(memory) + range->offset(), 0); }); diff --git a/src/Backends/DirectX12/src/descriptor_set.cpp b/src/Backends/DirectX12/src/descriptor_set.cpp index 59af525f..fe8ba7f9 100644 --- a/src/Backends/DirectX12/src/descriptor_set.cpp +++ b/src/Backends/DirectX12/src/descriptor_set.cpp @@ -141,7 +141,6 @@ void DirectX12DescriptorSet::update(UInt32 binding, const IDirectX12Buffer& buff } case DescriptorType::RWStructuredBuffer: { - // TODO: Support counter in AppendStructuredBuffer. for (UInt32 i(0); i < elementCount; ++i) { D3D12_UNORDERED_ACCESS_VIEW_DESC bufferView = { @@ -160,11 +159,12 @@ void DirectX12DescriptorSet::update(UInt32 binding, const IDirectX12Buffer& buff { for (UInt32 i(0); i < elementCount; ++i) { + // NOTE: One takes 4 byte size (sizeof(DWORD)) in DXGI_FORMAT_R32_TYPELESS format, which is required for raw buffers. D3D12_SHADER_RESOURCE_VIEW_DESC bufferView = { .Format = DXGI_FORMAT_R32_TYPELESS, .ViewDimension = D3D12_SRV_DIMENSION_BUFFER, .Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING, - .Buffer = { .FirstElement = ((bufferElement + i) * buffer.alignedElementSize()) / 4, .NumElements = static_cast(buffer.alignedElementSize() / 4), .StructureByteStride = 0, .Flags = D3D12_BUFFER_SRV_FLAG_RAW } + .Buffer = { .FirstElement = (bufferElement + i) * sizeof(DWORD), .NumElements = static_cast(buffer.alignedElementSize() / sizeof(DWORD)), .StructureByteStride = 0, .Flags = D3D12_BUFFER_SRV_FLAG_RAW } }; m_impl->m_layout.device().handle()->CreateShaderResourceView(buffer.handle().Get(), &bufferView, descriptorHandle); @@ -177,10 +177,11 @@ void DirectX12DescriptorSet::update(UInt32 binding, const IDirectX12Buffer& buff { for (UInt32 i(0); i < elementCount; ++i) { + // NOTE: Individual fields in a buffer are always required to be 4 bytes wide, while alignment between elements is 16 bytes (D3D12_RAW_UAV_SRV_BYTE_ALIGNMENT). D3D12_UNORDERED_ACCESS_VIEW_DESC bufferView = { .Format = DXGI_FORMAT_R32_TYPELESS, .ViewDimension = D3D12_UAV_DIMENSION_BUFFER, - .Buffer = { .FirstElement = ((bufferElement + i) * buffer.alignedElementSize()) / 4, .NumElements = static_cast(buffer.alignedElementSize() / 4), .StructureByteStride = 0, .CounterOffsetInBytes = 0, .Flags = D3D12_BUFFER_UAV_FLAG_RAW } + .Buffer = { .FirstElement = (bufferElement + i) * sizeof(DWORD), .NumElements = static_cast(buffer.alignedElementSize() / sizeof(DWORD)), .StructureByteStride = 0, .CounterOffsetInBytes = 0, .Flags = D3D12_BUFFER_UAV_FLAG_RAW } }; m_impl->m_layout.device().handle()->CreateUnorderedAccessView(buffer.handle().Get(), nullptr, &bufferView, descriptorHandle); @@ -197,7 +198,7 @@ void DirectX12DescriptorSet::update(UInt32 binding, const IDirectX12Buffer& buff .Format = DXGI_FORMAT_R32_TYPELESS, // TODO: Actually set the proper texel format. .ViewDimension = D3D12_SRV_DIMENSION_BUFFER, .Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING, - .Buffer = { .FirstElement = ((bufferElement + i) * buffer.alignedElementSize()) / 4, .NumElements = static_cast(buffer.alignedElementSize() / 4), .StructureByteStride = 0, .Flags = D3D12_BUFFER_SRV_FLAG_RAW } + .Buffer = { .FirstElement = (bufferElement + i) * sizeof(DWORD), .NumElements = static_cast(buffer.alignedElementSize() / sizeof(DWORD)), .StructureByteStride = 0 } }; m_impl->m_layout.device().handle()->CreateShaderResourceView(buffer.handle().Get(), &bufferView, descriptorHandle); @@ -213,7 +214,7 @@ void DirectX12DescriptorSet::update(UInt32 binding, const IDirectX12Buffer& buff D3D12_UNORDERED_ACCESS_VIEW_DESC bufferView = { .Format = DXGI_FORMAT_R32_TYPELESS, // TODO: Actually set the proper texel format. .ViewDimension = D3D12_UAV_DIMENSION_BUFFER, - .Buffer = { .FirstElement = ((bufferElement + i) * buffer.alignedElementSize()) / 4, .NumElements = static_cast(buffer.alignedElementSize() / 4), .StructureByteStride = 0, .CounterOffsetInBytes = 0, .Flags = D3D12_BUFFER_UAV_FLAG_RAW } + .Buffer = { .FirstElement = (bufferElement + i) * sizeof(DWORD), .NumElements = static_cast(buffer.alignedElementSize() / sizeof(DWORD)), .StructureByteStride = 0, .CounterOffsetInBytes = 0 } }; m_impl->m_layout.device().handle()->CreateUnorderedAccessView(buffer.handle().Get(), nullptr, &bufferView, descriptorHandle); diff --git a/src/Backends/DirectX12/src/descriptor_set_layout.cpp b/src/Backends/DirectX12/src/descriptor_set_layout.cpp index 7c68f663..c613e7c5 100644 --- a/src/Backends/DirectX12/src/descriptor_set_layout.cpp +++ b/src/Backends/DirectX12/src/descriptor_set_layout.cpp @@ -303,10 +303,9 @@ void DirectX12DescriptorSetLayout::free(const DirectX12DescriptorSet& descriptor // Descriptor set layout builder shared interface. // ------------------------------------------------------------------------------------------------ -constexpr DirectX12DescriptorSetLayoutBuilder::DirectX12DescriptorSetLayoutBuilder(DirectX12PipelineLayoutBuilder& parent, UInt32 space, ShaderStage stages, UInt32 maxUnboundedArraySize) : +constexpr DirectX12DescriptorSetLayoutBuilder::DirectX12DescriptorSetLayoutBuilder(DirectX12PipelineLayoutBuilder& parent, UInt32 space, ShaderStage stages) : DescriptorSetLayoutBuilder(parent, UniquePtr(new DirectX12DescriptorSetLayout(parent.device()))) { - m_state.maxUnboundedArraySize = maxUnboundedArraySize; } constexpr DirectX12DescriptorSetLayoutBuilder::~DirectX12DescriptorSetLayoutBuilder() noexcept = default; diff --git a/src/Backends/DirectX12/src/device.cpp b/src/Backends/DirectX12/src/device.cpp index dd0f3065..2f057f1d 100644 --- a/src/Backends/DirectX12/src/device.cpp +++ b/src/Backends/DirectX12/src/device.cpp @@ -28,6 +28,7 @@ class DirectX12Device::DirectX12DeviceImpl : public Implement { ComPtr m_globalBufferHeap, m_globalSamplerHeap; mutable std::mutex m_bufferBindMutex; Array> m_bufferDescriptorFragments, m_samplerDescriptorFragments; + ComPtr m_dispatchSignature, m_drawSignature, m_drawIndexedSignature, m_dispatchMeshSignature; public: DirectX12DeviceImpl(DirectX12Device* parent, const DirectX12GraphicsAdapter& adapter, UniquePtr&& surface, const DirectX12Backend& backend, UInt32 globalBufferHeapSize, UInt32 globalSamplerHeapSize) : @@ -173,6 +174,20 @@ class DirectX12Device::DirectX12DeviceImpl : public Implement { m_globalSamplerHeap->SetName(L"Global Sampler Heap"); #endif + // Initialize command signatures for indirect drawing. + D3D12_INDIRECT_ARGUMENT_DESC argumentDesc = { .Type = D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH }; + D3D12_COMMAND_SIGNATURE_DESC signatureDesc = { .ByteStride = sizeof(IndirectDispatchBatch), .NumArgumentDescs = 1, .pArgumentDescs = &argumentDesc, .NodeMask = 0x00 }; + raiseIfFailed(device->CreateCommandSignature(&signatureDesc, nullptr, IID_PPV_ARGS(&m_dispatchSignature)), "Unable to create indirect dispatch command signature."); + argumentDesc = { .Type = D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH_MESH }; + signatureDesc = { .ByteStride = sizeof(IndirectIndexedBatch), .NumArgumentDescs = 1, .pArgumentDescs = &argumentDesc, .NodeMask = 0x00 }; + raiseIfFailed(device->CreateCommandSignature(&signatureDesc, nullptr, IID_PPV_ARGS(&m_dispatchMeshSignature)), "Unable to create indirect mesh shader dispatch command signature."); + argumentDesc = { .Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW }; + signatureDesc = { .ByteStride = sizeof(IndirectBatch), .NumArgumentDescs = 1, .pArgumentDescs = &argumentDesc, .NodeMask = 0x00 }; + raiseIfFailed(device->CreateCommandSignature(&signatureDesc, nullptr, IID_PPV_ARGS(&m_drawSignature)), "Unable to create indirect draw command signature."); + argumentDesc = { .Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED }; + signatureDesc = { .ByteStride = sizeof(IndirectIndexedBatch), .NumArgumentDescs = 1, .pArgumentDescs = &argumentDesc, .NodeMask = 0x00 }; + raiseIfFailed(device->CreateCommandSignature(&signatureDesc, nullptr, IID_PPV_ARGS(&m_drawIndexedSignature)), "Unable to create indirect indexed draw command signature."); + return device; } @@ -455,6 +470,14 @@ DirectX12ComputePipeline& DirectX12Device::blitPipeline() const noexcept return *m_impl->m_blitPipeline; } +void DirectX12Device::indirectDrawSignatures(ComPtr& dispatchSignature, ComPtr& dispatchMeshSignature, ComPtr& drawSignature, ComPtr& drawIndexedSignature) const noexcept +{ + dispatchSignature = m_impl->m_dispatchSignature; + dispatchMeshSignature = m_impl->m_dispatchMeshSignature; + drawSignature = m_impl->m_drawSignature; + drawIndexedSignature = m_impl->m_drawIndexedSignature; +} + #if defined(LITEFX_BUILD_DEFINE_BUILDERS) DirectX12RenderPassBuilder DirectX12Device::buildRenderPass(UInt32 commandBuffers) const { diff --git a/src/Backends/DirectX12/src/factory.cpp b/src/Backends/DirectX12/src/factory.cpp index 56e76147..11b47e79 100644 --- a/src/Backends/DirectX12/src/factory.cpp +++ b/src/Backends/DirectX12/src/factory.cpp @@ -65,7 +65,7 @@ UniquePtr DirectX12GraphicsFactory::createBuffer(const String& else if (heap == ResourceHeap::Readback && !LITEFX_FLAG_IS_SET(usage, ResourceUsage::TransferDestination)) usage |= ResourceUsage::TransferDestination; - // Constant buffers are aligned to 256 byte chunks. All other buffers can be aligned to a multiple of 4 bytes (`sizeof(DWORD)`). The actual amount of memory allocated + // Constant buffers are aligned to 256 byte chunks. All other buffers can be aligned to a multiple of 16 bytes (D3D12_RAW_UAV_SRV_BYTE_ALIGNMENT). The actual amount of memory allocated // is then defined as the smallest multiple of 64kb, that's greater or equal to `resourceDesc.Width` below. For more info, see: // https://docs.microsoft.com/en-us/windows/win32/api/d3d12/nf-d3d12-id3d12device-getresourceallocationinfo#remarks. size_t elementAlignment = 0; @@ -75,7 +75,7 @@ UniquePtr DirectX12GraphicsFactory::createBuffer(const String& case BufferType::Uniform: elementAlignment = D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT; break; case BufferType::Vertex: case BufferType::Index: elementAlignment = 0; break; - default: elementAlignment = sizeof(DWORD); break; + default: elementAlignment = D3D12_RAW_UAV_SRV_BYTE_ALIGNMENT; break; } D3D12_RESOURCE_DESC1 resourceDesc { }; diff --git a/src/Backends/DirectX12/src/pipeline_layout.cpp b/src/Backends/DirectX12/src/pipeline_layout.cpp index 24ae57f6..f88fe051 100644 --- a/src/Backends/DirectX12/src/pipeline_layout.cpp +++ b/src/Backends/DirectX12/src/pipeline_layout.cpp @@ -296,7 +296,7 @@ void DirectX12PipelineLayoutBuilder::build() instance->handle() = instance->m_impl->initialize(); } -constexpr DirectX12DescriptorSetLayoutBuilder DirectX12PipelineLayoutBuilder::descriptorSet(UInt32 space, ShaderStage stages, UInt32 /*maxUnboundedArraySize*/) +constexpr DirectX12DescriptorSetLayoutBuilder DirectX12PipelineLayoutBuilder::descriptorSet(UInt32 space, ShaderStage stages) { return DirectX12DescriptorSetLayoutBuilder(*this, space, stages); } diff --git a/src/Backends/DirectX12/src/tlas.cpp b/src/Backends/DirectX12/src/tlas.cpp index d532886a..2b272508 100644 --- a/src/Backends/DirectX12/src/tlas.cpp +++ b/src/Backends/DirectX12/src/tlas.cpp @@ -67,11 +67,11 @@ class DirectX12TopLevelAccelerationStructure::DirectX12TopLevelAccelerationStruc }; // Transition the buffer into UAV state. We create manual barriers here, as the special access flag is only required in this specific situation. - CD3DX12_BUFFER_BARRIER preBarrier[2] = { + CD3DX12_BUFFER_BARRIER preBarrier[1] = { CD3DX12_BUFFER_BARRIER(afterCopy ? D3D12_BARRIER_SYNC_COPY_RAYTRACING_ACCELERATION_STRUCTURE : D3D12_BARRIER_SYNC_BUILD_RAYTRACING_ACCELERATION_STRUCTURE, D3D12_BARRIER_SYNC_EMIT_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO, D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_WRITE, D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ, std::as_const(*m_buffer).handle().Get()), - CD3DX12_BUFFER_BARRIER(D3D12_BARRIER_SYNC_NONE, D3D12_BARRIER_SYNC_EMIT_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO, D3D12_BARRIER_ACCESS_NO_ACCESS, D3D12_BARRIER_ACCESS_UNORDERED_ACCESS, std::as_const(*m_postBuildBuffer).handle().Get()), + //CD3DX12_BUFFER_BARRIER(D3D12_BARRIER_SYNC_NONE, D3D12_BARRIER_SYNC_EMIT_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO, D3D12_BARRIER_ACCESS_NO_ACCESS, D3D12_BARRIER_ACCESS_UNORDERED_ACCESS, std::as_const(*m_postBuildBuffer).handle().Get()), }; - auto preBarrierGroup = CD3DX12_BARRIER_GROUP(2, preBarrier); + auto preBarrierGroup = CD3DX12_BARRIER_GROUP(1, preBarrier); commandBuffer.handle()->Barrier(1, &preBarrierGroup); // Emit the diff --git a/src/Backends/Vulkan/include/litefx/backends/vulkan.hpp b/src/Backends/Vulkan/include/litefx/backends/vulkan.hpp index e9e205f2..30c7109f 100644 --- a/src/Backends/Vulkan/include/litefx/backends/vulkan.hpp +++ b/src/Backends/Vulkan/include/litefx/backends/vulkan.hpp @@ -638,17 +638,11 @@ namespace LiteFX::Rendering::Backends { /// /// Initializes a Vulkan descriptor set layout. /// - /// - /// If the descriptor set contains an unbounded array, it still is not truly unbounded. Instead, only maximum number of descriptors can be allocated from the descriptor set. This - /// number is defined by the device limits and depends on the descriptor type. If you need more descriptors in one array, increase the - /// parameter. Keep in mind that you may be only able to use less or smaller unbounded descriptor arrays in other descriptor sets as a result. - /// /// The parent device, the pipeline layout has been created from. /// The descriptor layouts of the descriptors within the descriptor set. /// The space or set id of the descriptor set. /// The shader stages, the descriptor sets are bound to. - /// The maximum number of descriptors in an unbounded array. - explicit VulkanDescriptorSetLayout(const VulkanDevice& device, Enumerable>&& descriptorLayouts, UInt32 space, ShaderStage stages, UInt32 maxUnboundedArraySize = 104857); + explicit VulkanDescriptorSetLayout(const VulkanDevice& device, Enumerable>&& descriptorLayouts, UInt32 space, ShaderStage stages); VulkanDescriptorSetLayout(VulkanDescriptorSetLayout&&) = delete; VulkanDescriptorSetLayout(const VulkanDescriptorSetLayout&) = delete; virtual ~VulkanDescriptorSetLayout() noexcept; @@ -989,9 +983,12 @@ namespace LiteFX::Rendering::Backends { public: using base_type = CommandBuffer; using base_type::dispatch; + using base_type::dispatchIndirect; using base_type::dispatchMesh; using base_type::draw; + using base_type::drawIndirect; using base_type::drawIndexed; + using base_type::drawIndexedIndirect; using base_type::barrier; using base_type::transfer; using base_type::generateMipMaps; @@ -1140,15 +1137,36 @@ namespace LiteFX::Rendering::Backends { /// void dispatch(const Vector3u& threadCount) const noexcept override; + /// + void dispatchIndirect(const IVulkanBuffer& batchBuffer, UInt32 batchCount, UInt64 offset = 0) const noexcept override; + /// void dispatchMesh(const Vector3u& threadCount) const noexcept override; + /// + void dispatchMeshIndirect(const IVulkanBuffer& batchBuffer, UInt32 batchCount, UInt64 offset = 0) const noexcept override; + + /// + void dispatchMeshIndirect(const IVulkanBuffer& batchBuffer, const IVulkanBuffer& countBuffer, UInt64 offset = 0, UInt64 countOffset = 0, UInt32 maxBatches = std::numeric_limits::max()) const noexcept override; + /// void draw(UInt32 vertices, UInt32 instances = 1, UInt32 firstVertex = 0, UInt32 firstInstance = 0) const noexcept override; + /// + void drawIndirect(const IVulkanBuffer& batchBuffer, UInt32 batchCount, UInt64 offset = 0) const noexcept override; + + /// + void drawIndirect(const IVulkanBuffer& batchBuffer, const IVulkanBuffer& countBuffer, UInt64 offset = 0, UInt64 countOffset = 0, UInt32 maxBatches = std::numeric_limits::max()) const noexcept override; + /// void drawIndexed(UInt32 indices, UInt32 instances = 1, UInt32 firstIndex = 0, Int32 vertexOffset = 0, UInt32 firstInstance = 0) const noexcept override; + /// + void drawIndexedIndirect(const IVulkanBuffer& batchBuffer, UInt32 batchCount, UInt64 offset = 0) const noexcept override; + + /// + void drawIndexedIndirect(const IVulkanBuffer& batchBuffer, const IVulkanBuffer& countBuffer, UInt64 offset = 0, UInt64 countOffset = 0, UInt32 maxBatches = std::numeric_limits::max()) const noexcept override; + /// void pushConstants(const VulkanPushConstantsLayout& layout, const void* const memory) const noexcept override; diff --git a/src/Backends/Vulkan/include/litefx/backends/vulkan_builders.hpp b/src/Backends/Vulkan/include/litefx/backends/vulkan_builders.hpp index b20c881d..d5a52efc 100644 --- a/src/Backends/Vulkan/include/litefx/backends/vulkan_builders.hpp +++ b/src/Backends/Vulkan/include/litefx/backends/vulkan_builders.hpp @@ -158,8 +158,7 @@ namespace LiteFX::Rendering::Backends { /// The parent pipeline layout builder. /// The space the descriptor set is bound to. /// The shader stages, the descriptor set is accessible from. - /// The maximum array size of unbounded descriptor arrays in this descriptor set. - constexpr inline explicit VulkanDescriptorSetLayoutBuilder(VulkanPipelineLayoutBuilder& parent, UInt32 space = 0, ShaderStage stages = ShaderStage::Any, UInt32 maxUnboundedArraySize = 0); + constexpr inline explicit VulkanDescriptorSetLayoutBuilder(VulkanPipelineLayoutBuilder& parent, UInt32 space = 0, ShaderStage stages = ShaderStage::Any); VulkanDescriptorSetLayoutBuilder(const VulkanDescriptorSetLayoutBuilder&) = delete; VulkanDescriptorSetLayoutBuilder(VulkanDescriptorSetLayoutBuilder&&) = delete; constexpr inline virtual ~VulkanDescriptorSetLayoutBuilder() noexcept; @@ -236,8 +235,7 @@ namespace LiteFX::Rendering::Backends { /// /// The space, the descriptor set is bound to. /// The stages, the descriptor set will be accessible from. - /// The maximum array size of unbounded descriptor arrays in this descriptor set. - constexpr inline VulkanDescriptorSetLayoutBuilder descriptorSet(UInt32 space = 0, ShaderStage stages = ShaderStage::Any, UInt32 maxUnboundedArraySize = 0); + constexpr inline VulkanDescriptorSetLayoutBuilder descriptorSet(UInt32 space = 0, ShaderStage stages = ShaderStage::Any); /// /// Builds a new push constants layout for the pipeline layout. diff --git a/src/Backends/Vulkan/src/buffer.cpp b/src/Backends/Vulkan/src/buffer.cpp index e80ee5c5..01c45628 100644 --- a/src/Backends/Vulkan/src/buffer.cpp +++ b/src/Backends/Vulkan/src/buffer.cpp @@ -93,15 +93,9 @@ void VulkanBuffer::map(const void* const data, size_t size, UInt32 element) if (element >= m_impl->m_elements) [[unlikely]] throw ArgumentOutOfRangeException("element", 0u, m_impl->m_elements, element, "The element {0} is out of range. The buffer only contains {1} elements.", element, m_impl->m_elements); - size_t alignedSize = size; - size_t alignment = this->elementAlignment(); - - if (alignment > 0) - alignedSize = (size + alignment - 1) & ~(alignment - 1); - char* buffer; // A pointer to the whole (aligned) buffer memory. raiseIfFailed(::vmaMapMemory(m_impl->m_allocator, m_impl->m_allocation, reinterpret_cast(&buffer)), "Unable to map buffer memory."); - auto result = ::memcpy_s(reinterpret_cast(buffer + (element * alignedSize)), alignedSize, data, size); + auto result = ::memcpy_s(reinterpret_cast(buffer + (element * this->alignedElementSize())), this->size(), data, size); ::vmaUnmapMemory(m_impl->m_allocator, m_impl->m_allocation); @@ -119,17 +113,11 @@ void VulkanBuffer::map(void* data, size_t size, UInt32 element, bool write) if (element >= m_impl->m_elements) [[unlikely]] throw ArgumentOutOfRangeException("element", 0u, m_impl->m_elements, element, "The element {0} is out of range. The buffer only contains {1} elements.", element, m_impl->m_elements); - size_t alignedSize = size; - size_t alignment = this->elementAlignment(); - - if (alignment > 0) - alignedSize = (size + alignment - 1) & ~(alignment - 1); - char* buffer; // A pointer to the whole (aligned) buffer memory. raiseIfFailed(::vmaMapMemory(m_impl->m_allocator, m_impl->m_allocation, reinterpret_cast(&buffer)), "Unable to map buffer memory."); auto result = write ? - ::memcpy_s(reinterpret_cast(buffer + (element * alignedSize)), alignedSize, data, size) : - ::memcpy_s(data, size, reinterpret_cast(buffer + (element * alignedSize)), alignedSize); + ::memcpy_s(reinterpret_cast(buffer + (element * this->alignedElementSize())), this->size(), data, size) : + ::memcpy_s(data, size, reinterpret_cast(buffer + (element * this->alignedElementSize())), size); ::vmaUnmapMemory(m_impl->m_allocator, m_impl->m_allocation); diff --git a/src/Backends/Vulkan/src/command_buffer.cpp b/src/Backends/Vulkan/src/command_buffer.cpp index 2f822eda..2af395ff 100644 --- a/src/Backends/Vulkan/src/command_buffer.cpp +++ b/src/Backends/Vulkan/src/command_buffer.cpp @@ -7,13 +7,14 @@ extern PFN_vkCmdBuildAccelerationStructuresKHR vkCmdBuildAccelerationStructures; extern PFN_vkCmdCopyAccelerationStructureKHR vkCmdCopyAccelerationStructure; extern PFN_vkDestroyAccelerationStructureKHR vkDestroyAccelerationStructure; extern PFN_vkCmdTraceRaysKHR vkCmdTraceRays; +extern PFN_vkCmdDrawMeshTasksEXT vkCmdDrawMeshTasks; +extern PFN_vkCmdDrawMeshTasksIndirectEXT vkCmdDrawMeshTasksIndirect; +extern PFN_vkCmdDrawMeshTasksIndirectCountEXT vkCmdDrawMeshTasksIndirectCount; // ------------------------------------------------------------------------------------------------ // Implementation. // ------------------------------------------------------------------------------------------------ -extern PFN_vkCmdDrawMeshTasksEXT vkCmdDrawMeshTasks; - class VulkanCommandBuffer::VulkanCommandBufferImpl : public Implement { public: friend class VulkanCommandBuffer; @@ -637,21 +638,56 @@ void VulkanCommandBuffer::dispatch(const Vector3u& threadCount) const noexcept ::vkCmdDispatch(this->handle(), threadCount.x(), threadCount.y(), threadCount.z()); } +void VulkanCommandBuffer::dispatchIndirect(const IVulkanBuffer& batchBuffer, UInt32 batchCount, UInt64 offset) const noexcept +{ + ::vkCmdDispatchIndirect(this->handle(), batchBuffer.handle(), offset); +} + void VulkanCommandBuffer::dispatchMesh(const Vector3u& threadCount) const noexcept { ::vkCmdDrawMeshTasks(this->handle(), threadCount.x(), threadCount.y(), threadCount.z()); } +void VulkanCommandBuffer::dispatchMeshIndirect(const IVulkanBuffer& batchBuffer, UInt32 batchCount, UInt64 offset) const noexcept +{ + ::vkCmdDrawMeshTasksIndirect(this->handle(), batchBuffer.handle(), offset, batchCount, batchBuffer.elementSize()); +} + +void VulkanCommandBuffer::dispatchMeshIndirect(const IVulkanBuffer& batchBuffer, const IVulkanBuffer& countBuffer, UInt64 offset, UInt64 countOffset, UInt32 maxBatches) const noexcept +{ + ::vkCmdDrawMeshTasksIndirectCount(this->handle(), batchBuffer.handle(), offset, countBuffer.handle(), countOffset, std::min(maxBatches, static_cast(batchBuffer.alignedElementSize() / sizeof(IndirectDispatchBatch))), sizeof(IndirectDispatchBatch)); +} + void VulkanCommandBuffer::draw(UInt32 vertices, UInt32 instances, UInt32 firstVertex, UInt32 firstInstance) const noexcept { ::vkCmdDraw(this->handle(), vertices, instances, firstVertex, firstInstance); } +void VulkanCommandBuffer::drawIndirect(const IVulkanBuffer& batchBuffer, UInt32 batchCount, UInt64 offset) const noexcept +{ + ::vkCmdDrawIndirect(this->handle(), batchBuffer.handle(), offset, batchCount, batchBuffer.elementSize()); +} + +void VulkanCommandBuffer::drawIndirect(const IVulkanBuffer& batchBuffer, const IVulkanBuffer& countBuffer, UInt64 offset, UInt64 countOffset, UInt32 maxBatches) const noexcept +{ + ::vkCmdDrawIndirectCount(this->handle(), batchBuffer.handle(), offset, countBuffer.handle(), countOffset, std::min(maxBatches, static_cast(batchBuffer.alignedElementSize() / sizeof(IndirectBatch))), sizeof(IndirectBatch)); +} + void VulkanCommandBuffer::drawIndexed(UInt32 indices, UInt32 instances, UInt32 firstIndex, Int32 vertexOffset, UInt32 firstInstance) const noexcept { ::vkCmdDrawIndexed(this->handle(), indices, instances, firstIndex, vertexOffset, firstInstance); } +void VulkanCommandBuffer::drawIndexedIndirect(const IVulkanBuffer& batchBuffer, UInt32 batchCount, UInt64 offset) const noexcept +{ + ::vkCmdDrawIndexedIndirect(this->handle(), batchBuffer.handle(), offset, batchCount, batchBuffer.elementSize()); +} + +void VulkanCommandBuffer::drawIndexedIndirect(const IVulkanBuffer& batchBuffer, const IVulkanBuffer& countBuffer, UInt64 offset, UInt64 countOffset, UInt32 maxBatches) const noexcept +{ + ::vkCmdDrawIndexedIndirectCount(this->handle(), batchBuffer.handle(), offset, countBuffer.handle(), countOffset, std::min(maxBatches, static_cast(batchBuffer.alignedElementSize() / sizeof(IndirectIndexedBatch))), sizeof(IndirectIndexedBatch)); +} + void VulkanCommandBuffer::pushConstants(const VulkanPushConstantsLayout& layout, const void* const memory) const noexcept { std::ranges::for_each(layout.ranges(), [this, &layout, &memory](const VulkanPushConstantsRange* range) { ::vkCmdPushConstants(this->handle(), layout.pipelineLayout().handle(), static_cast(Vk::getShaderStage(range->stage())), range->offset(), range->size(), memory); }); diff --git a/src/Backends/Vulkan/src/descriptor_set_layout.cpp b/src/Backends/Vulkan/src/descriptor_set_layout.cpp index 458623fe..65c39bb3 100644 --- a/src/Backends/Vulkan/src/descriptor_set_layout.cpp +++ b/src/Backends/Vulkan/src/descriptor_set_layout.cpp @@ -57,7 +57,7 @@ class VulkanDescriptorSetLayout::VulkanDescriptorSetLayoutImpl : public Implemen } public: - VkDescriptorSetLayout initialize(UInt32 maxUnboundedArraySize) + VkDescriptorSetLayout initialize() { LITEFX_TRACE(VULKAN_LOG, "Defining layout for descriptor set {0} {{ Stages: {1} }}...", m_space, m_stages); @@ -98,12 +98,24 @@ class VulkanDescriptorSetLayout::VulkanDescriptorSetLayoutImpl : public Implemen Array bindingFlags; Array bindingFlagCreateInfo; + // Track maximum number of descriptors in unbounded arrays. + auto maxUniformBuffers = m_device.adapter().limits().maxDescriptorSetUniformBuffers; + auto maxStorageBuffers = m_device.adapter().limits().maxDescriptorSetStorageBuffers; + auto maxStorageImages = m_device.adapter().limits().maxDescriptorSetStorageImages; + auto maxSampledImages = m_device.adapter().limits().maxDescriptorSetSampledImages; + auto maxSamplers = m_device.adapter().limits().maxDescriptorSetSamplers; + auto maxAttachments = m_device.adapter().limits().maxDescriptorSetInputAttachments; + std::ranges::for_each(m_descriptorLayouts, [&, i = 0](const UniquePtr& layout) mutable { auto bindingPoint = layout->binding(); auto type = layout->descriptorType(); LITEFX_TRACE(VULKAN_LOG, "\tWith descriptor {0}/{1} {{ Type: {2}, Element size: {3} bytes, Array size: {6}, Offset: {4}, Binding point: {5} }}...", ++i, m_descriptorLayouts.size(), type, layout->elementSize(), 0, bindingPoint, layout->descriptors()); + // Unbounded arrays are only allowed for the last descriptor in the descriptor set (https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkDescriptorBindingFlagBits.html#_description). + if (m_usesDescriptorIndexing) [[unlikely]] + throw InvalidArgumentException("descriptorLayouts", "If an unbounded runtime array descriptor is used, it must be the last descriptor in the descriptor set."); + VkDescriptorSetLayoutBinding binding = {}; binding.binding = bindingPoint; binding.descriptorCount = layout->descriptors(); @@ -137,16 +149,62 @@ class VulkanDescriptorSetLayout::VulkanDescriptorSetLayoutImpl : public Implemen // If the descriptor is an unbounded runtime array, disable validation warnings about partially bound elements. if (binding.descriptorCount != std::numeric_limits::max()) - bindingFlags.push_back(VK_DESCRIPTOR_BINDING_UPDATE_AFTER_BIND_BIT | VK_DESCRIPTOR_BINDING_UPDATE_UNUSED_WHILE_PENDING_BIT); + { + bindingFlags.push_back({ VK_DESCRIPTOR_BINDING_UPDATE_AFTER_BIND_BIT | VK_DESCRIPTOR_BINDING_UPDATE_UNUSED_WHILE_PENDING_BIT }); + + // Track remaining descriptors towards limit. + switch (binding.descriptorType) + { + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + maxStorageBuffers -= binding.descriptorCount; + break; + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + maxUniformBuffers -= binding.descriptorCount; + break; + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + maxStorageImages -= binding.descriptorCount; + break; + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + maxSampledImages -= binding.descriptorCount; + break; + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + maxAttachments -= binding.descriptorCount; + break; + case VK_DESCRIPTOR_TYPE_SAMPLER: + maxSamplers -= binding.descriptorCount; + break; + } + } else { - // Unbounded arrays must be the only descriptor within a descriptor set. - if (m_descriptorLayouts.size() != 1) [[unlikely]] - throw InvalidArgumentException("descriptorLayouts", "If an unbounded runtime array descriptor is used, it must be the only descriptor in the descriptor set, however the current descriptor set specifies {0} descriptors", m_descriptorLayouts.size()); - - bindingFlags.push_back(VK_DESCRIPTOR_BINDING_PARTIALLY_BOUND_BIT | VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT | VK_DESCRIPTOR_BINDING_UPDATE_AFTER_BIND_BIT | VK_DESCRIPTOR_BINDING_UPDATE_UNUSED_WHILE_PENDING_BIT); + bindingFlags.push_back({ VK_DESCRIPTOR_BINDING_PARTIALLY_BOUND_BIT | VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT | VK_DESCRIPTOR_BINDING_UPDATE_AFTER_BIND_BIT | VK_DESCRIPTOR_BINDING_UPDATE_UNUSED_WHILE_PENDING_BIT }); m_usesDescriptorIndexing = true; - binding.descriptorCount = maxUnboundedArraySize; + + switch (binding.descriptorType) + { + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + binding.descriptorCount = maxStorageBuffers; + break; + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + binding.descriptorCount = maxUniformBuffers; + break; + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + binding.descriptorCount = maxStorageImages; + break; + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + binding.descriptorCount = maxSampledImages; + break; + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + binding.descriptorCount = maxAttachments; + break; + case VK_DESCRIPTOR_TYPE_SAMPLER: + binding.descriptorCount = maxSamplers; + break; + } } bindings.push_back(binding); @@ -254,10 +312,10 @@ class VulkanDescriptorSetLayout::VulkanDescriptorSetLayoutImpl : public Implemen // Shared interface. // ------------------------------------------------------------------------------------------------ -VulkanDescriptorSetLayout::VulkanDescriptorSetLayout(const VulkanDevice& device, Enumerable>&& descriptorLayouts, UInt32 space, ShaderStage stages, UInt32 maxUnboundedArraySize) : +VulkanDescriptorSetLayout::VulkanDescriptorSetLayout(const VulkanDevice& device, Enumerable>&& descriptorLayouts, UInt32 space, ShaderStage stages) : m_impl(makePimpl(this, device, std::move(descriptorLayouts), space, stages)), Resource(VK_NULL_HANDLE) { - this->handle() = m_impl->initialize(maxUnboundedArraySize); + this->handle() = m_impl->initialize(); } VulkanDescriptorSetLayout::VulkanDescriptorSetLayout(const VulkanDevice& device) noexcept : @@ -514,10 +572,9 @@ size_t VulkanDescriptorSetLayout::pools() const noexcept // Descriptor set layout builder shared interface. // ------------------------------------------------------------------------------------------------ -constexpr VulkanDescriptorSetLayoutBuilder::VulkanDescriptorSetLayoutBuilder(VulkanPipelineLayoutBuilder& parent, UInt32 space, ShaderStage stages, UInt32 maxUnboundedArraySize) : +constexpr VulkanDescriptorSetLayoutBuilder::VulkanDescriptorSetLayoutBuilder(VulkanPipelineLayoutBuilder& parent, UInt32 space, ShaderStage stages) : DescriptorSetLayoutBuilder(parent, UniquePtr(new VulkanDescriptorSetLayout(parent.device()))) { - m_state.maxUnboundedArraySize = maxUnboundedArraySize; } constexpr VulkanDescriptorSetLayoutBuilder::~VulkanDescriptorSetLayoutBuilder() noexcept = default; @@ -528,7 +585,7 @@ void VulkanDescriptorSetLayoutBuilder::build() instance->m_impl->m_descriptorLayouts = std::move(m_state.descriptorLayouts); instance->m_impl->m_space = std::move(m_state.space); instance->m_impl->m_stages = std::move(m_state.stages); - instance->m_impl->initialize(m_state.maxUnboundedArraySize); + instance->m_impl->initialize(); } constexpr UniquePtr VulkanDescriptorSetLayoutBuilder::makeDescriptor(DescriptorType type, UInt32 binding, UInt32 descriptorSize, UInt32 descriptors) diff --git a/src/Backends/Vulkan/src/device.cpp b/src/Backends/Vulkan/src/device.cpp index 0f53b9f7..0b624854 100644 --- a/src/Backends/Vulkan/src/device.cpp +++ b/src/Backends/Vulkan/src/device.cpp @@ -4,7 +4,9 @@ using namespace LiteFX::Rendering::Backends; -extern PFN_vkCmdDrawMeshTasksEXT vkCmdDrawMeshTasks { nullptr }; +extern PFN_vkCmdDrawMeshTasksEXT vkCmdDrawMeshTasks { nullptr }; +extern PFN_vkCmdDrawMeshTasksIndirectEXT vkCmdDrawMeshTasksIndirect { nullptr }; +extern PFN_vkCmdDrawMeshTasksIndirectCountEXT vkCmdDrawMeshTasksIndirectCount { nullptr }; extern PFN_vkGetAccelerationStructureBuildSizesKHR vkGetAccelerationStructureBuildSizes { nullptr }; extern PFN_vkCreateAccelerationStructureKHR vkCreateAccelerationStructure { nullptr }; extern PFN_vkDestroyAccelerationStructureKHR vkDestroyAccelerationStructure { nullptr }; @@ -314,6 +316,7 @@ class VulkanDevice::VulkanDeviceImpl : public Implement { .features = { .geometryShader = true, .tessellationShader = true, + .drawIndirectFirstInstance = features.DrawIndirect, .samplerAnisotropy = true } }; @@ -331,6 +334,7 @@ class VulkanDevice::VulkanDeviceImpl : public Implement { VkPhysicalDeviceVulkan12Features deviceFeatures12 = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES, .pNext = &deviceFeatures13, + .drawIndirectCount = features.DrawIndirect, .descriptorIndexing = true, .shaderInputAttachmentArrayDynamicIndexing = true, .shaderUniformTexelBufferArrayDynamicIndexing = true, @@ -358,10 +362,17 @@ class VulkanDevice::VulkanDeviceImpl : public Implement { .bufferDeviceAddress = true }; + // Enable shader draw parameters, if we use indirect draw. + VkPhysicalDeviceVulkan11Features deviceFeatures11 = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES, + .pNext = &deviceFeatures12, + .shaderDrawParameters = features.DrawIndirect + }; + // Enable extended dynamic state. VkPhysicalDeviceExtendedDynamicStateFeaturesEXT extendedDynamicStateFeatures = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT, - .pNext = &deviceFeatures12, + .pNext = &deviceFeatures11, .extendedDynamicState = true }; @@ -384,8 +395,17 @@ class VulkanDevice::VulkanDeviceImpl : public Implement { debugMarkerSetObjectName = reinterpret_cast(::vkGetDeviceProcAddr(device, "vkDebugMarkerSetObjectNameEXT")); #endif - if (features.MeshShaders && vkCmdDrawMeshTasks == nullptr) - vkCmdDrawMeshTasks = reinterpret_cast(::vkGetDeviceProcAddr(device, "vkCmdDrawMeshTasksEXT")); + if (features.MeshShaders) + { + if (vkCmdDrawMeshTasks == nullptr) + vkCmdDrawMeshTasks = reinterpret_cast(::vkGetDeviceProcAddr(device, "vkCmdDrawMeshTasksEXT")); + + if (vkCmdDrawMeshTasksIndirect) + vkCmdDrawMeshTasksIndirect = reinterpret_cast(::vkGetDeviceProcAddr(device, "vkCmdDrawMeshTasksIndirectEXT")); + + if (vkCmdDrawMeshTasksIndirectCount) + vkCmdDrawMeshTasksIndirectCount = reinterpret_cast(::vkGetDeviceProcAddr(device, "vkCmdDrawMeshTasksIndirectCountEXT")); + } if (features.RayTracing) { diff --git a/src/Backends/Vulkan/src/factory.cpp b/src/Backends/Vulkan/src/factory.cpp index e9e81736..e440bc73 100644 --- a/src/Backends/Vulkan/src/factory.cpp +++ b/src/Backends/Vulkan/src/factory.cpp @@ -78,13 +78,13 @@ UniquePtr VulkanGraphicsFactory::createBuffer(const String& name, switch (type) { - case BufferType::Vertex: + case BufferType::Vertex: usageFlags |= VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; break; - case BufferType::Index: + case BufferType::Index: usageFlags |= VK_BUFFER_USAGE_INDEX_BUFFER_BIT; break; - case BufferType::Uniform: + case BufferType::Uniform: usageFlags |= VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; alignment = m_impl->m_device.adapter().limits().minUniformBufferOffsetAlignment; break; @@ -108,6 +108,10 @@ UniquePtr VulkanGraphicsFactory::createBuffer(const String& name, usageFlags |= VK_BUFFER_USAGE_SHADER_BINDING_TABLE_BIT_KHR; alignment = m_impl->m_device.adapter().limits().minStorageBufferOffsetAlignment; break; + case BufferType::Indirect: + usageFlags |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT; + alignment = m_impl->m_device.adapter().limits().minStorageBufferOffsetAlignment; + break; } if (alignment > 0) diff --git a/src/Backends/Vulkan/src/pipeline_layout.cpp b/src/Backends/Vulkan/src/pipeline_layout.cpp index c2b3bf14..9b7287b2 100644 --- a/src/Backends/Vulkan/src/pipeline_layout.cpp +++ b/src/Backends/Vulkan/src/pipeline_layout.cpp @@ -56,7 +56,7 @@ class VulkanPipelineLayout::VulkanPipelineLayoutImpl : public Implement{ new VulkanDescriptorSetLayout(m_device, { }, s, ShaderStage::Any, 0) }); // No descriptor can ever be allocated from an empty descriptor set. + m_descriptorSetLayouts.push_back(UniquePtr{ new VulkanDescriptorSetLayout(m_device, { }, s, ShaderStage::Any) }); // No descriptor can ever be allocated from an empty descriptor set. // Re-order them. std::ranges::sort(m_descriptorSetLayouts, [](const UniquePtr& a, const UniquePtr& b) { return a->space() < b->space(); }); @@ -175,9 +175,9 @@ void VulkanPipelineLayoutBuilder::build() instance->handle() = instance->m_impl->initialize(); } -constexpr VulkanDescriptorSetLayoutBuilder VulkanPipelineLayoutBuilder::descriptorSet(UInt32 space, ShaderStage stages, UInt32 maxUnboundedArraySize) +constexpr VulkanDescriptorSetLayoutBuilder VulkanPipelineLayoutBuilder::descriptorSet(UInt32 space, ShaderStage stages) { - return VulkanDescriptorSetLayoutBuilder(*this, space, stages, maxUnboundedArraySize); + return VulkanDescriptorSetLayoutBuilder(*this, space, stages); } constexpr VulkanPushConstantsLayoutBuilder VulkanPipelineLayoutBuilder::pushConstants(UInt32 size) diff --git a/src/Backends/Vulkan/src/shader_program.cpp b/src/Backends/Vulkan/src/shader_program.cpp index eaa6eea9..dfdb9cca 100644 --- a/src/Backends/Vulkan/src/shader_program.cpp +++ b/src/Backends/Vulkan/src/shader_program.cpp @@ -259,29 +259,17 @@ class VulkanShaderProgram::VulkanShaderProgramImpl : public Implement`. This is conceptually identical, so it ultimately makes no difference. - if (descriptor->type_description->op != SpvOp::SpvOpTypeRuntimeArray) - { - if ((descriptor->type_description->members[0].type_flags & (SPV_REFLECT_TYPE_FLAG_ARRAY | SPV_REFLECT_TYPE_FLAG_INT)) != 0) - type = descriptor->resource_type == SPV_REFLECT_RESOURCE_FLAG_SRV ? DescriptorType::ByteAddressBuffer : DescriptorType::RWByteAddressBuffer; - else - type = descriptor->resource_type == SPV_REFLECT_RESOURCE_FLAG_SRV ? DescriptorType::StructuredBuffer : DescriptorType::RWStructuredBuffer; - } + if ((descriptor->type_description->members[0].type_flags & SPV_REFLECT_TYPE_FLAG_STRUCT) == SPV_REFLECT_TYPE_FLAG_STRUCT) + type = (descriptor->resource_type & SPV_REFLECT_RESOURCE_FLAG_SRV) == SPV_REFLECT_RESOURCE_FLAG_SRV ? DescriptorType::StructuredBuffer : DescriptorType::RWStructuredBuffer; else - { - if ((descriptor->type_description->members[0].type_flags & (SPV_REFLECT_TYPE_FLAG_ARRAY | SPV_REFLECT_TYPE_FLAG_INT)) != 0) - type = descriptor->resource_type == SPV_REFLECT_RESOURCE_FLAG_SRV ? DescriptorType::ByteAddressBuffer : DescriptorType::RWByteAddressBuffer; - else // Assume SPV_REFLECT_RESOURCE_FLAG_SRV resource type, since UAV arrays are not allowed. - type = DescriptorType::StructuredBuffer; - } + type = (descriptor->resource_type & SPV_REFLECT_RESOURCE_FLAG_SRV) == SPV_REFLECT_RESOURCE_FLAG_SRV ? DescriptorType::ByteAddressBuffer : DescriptorType::RWByteAddressBuffer; break; } @@ -289,7 +277,7 @@ class VulkanShaderProgram::VulkanShaderProgramImpl : public Implement buffers[10]`) and declaring an array of descriptors - // (e.g. `StructuredBuffer buffers[10]`). The first variant only takes up a single descriptor, to which a buffer array can be bound. The second variant describes an + // (e.g. `StructuredBuffer buffers[]`). The first variant only takes up a single descriptor, to which a buffer array can be bound. The second variant describes an // variable-sized array of descriptors (aka runtime array). In the engine we treat both identically. A runtime array is defined as a descriptor with 0xFFFFFFFF elements. // Theoretically, we could bind a buffer array to an descriptor within a descriptor array, which is currently an unsupported use case. In the future, we might want to have // a separate descriptor flag for descriptor arrays and array descriptors and also provide methods to bind them both. diff --git a/src/Backends/Vulkan/src/swapchain.cpp b/src/Backends/Vulkan/src/swapchain.cpp index 233effcd..4fac9c76 100644 --- a/src/Backends/Vulkan/src/swapchain.cpp +++ b/src/Backends/Vulkan/src/swapchain.cpp @@ -97,7 +97,7 @@ class VulkanSwapChain::VulkanSwapChainImpl : public Implement { createInfo.presentMode = vsync ? VK_PRESENT_MODE_FIFO_KHR : VK_PRESENT_MODE_IMMEDIATE_KHR;/* VK_PRESENT_MODE_MAILBOX_KHR;*/ m_vsync = vsync; - LITEFX_TRACE(VULKAN_LOG, "Creating swap chain for device {0} {{ Images: {1}, Extent: {2}x{3} Px, Format: {4}, VSync: {5} }}...", reinterpret_cast(&m_device), images, createInfo.imageExtent.width, createInfo.imageExtent.height, selectedFormat, vsync); + LITEFX_TRACE(VULKAN_LOG, "Creating swap chain for device {0} {{ Images: {1}, Extent: {2}x{3} Px, Format: {4}, VSync: {5} }}...", reinterpret_cast(&m_device), images, createInfo.imageExtent.width, createInfo.imageExtent.height, selectedFormat, vsync); // Log if something needed to be changed. [[unlikely]] if (selectedFormat != format) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d444704e..dd4fe51e 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -56,6 +56,7 @@ IF(LITEFX_BUILD_EXAMPLES) ADD_SUBDIRECTORY(Samples/Multithreading) ADD_SUBDIRECTORY(Samples/Bindless) ADD_SUBDIRECTORY(Samples/Compute) + ADD_SUBDIRECTORY(Samples/Indirect) ADD_SUBDIRECTORY(Samples/MeshShader) ADD_SUBDIRECTORY(Samples/RayTracing) ADD_SUBDIRECTORY(Samples/RayQueries) diff --git a/src/Modules/overlay-ports/dx-agility-sdk/portfile.cmake b/src/Modules/overlay-ports/dx-agility-sdk/portfile.cmake index b53cd9d7..d88dd4af 100644 --- a/src/Modules/overlay-ports/dx-agility-sdk/portfile.cmake +++ b/src/Modules/overlay-ports/dx-agility-sdk/portfile.cmake @@ -4,7 +4,7 @@ vcpkg_check_linkage(ONLY_DYNAMIC_LIBRARY) vcpkg_download_distfile(ARCHIVE URLS "https://www.nuget.org/api/v2/package/Microsoft.Direct3D.D3D12/${VERSION}" FILENAME "dxagilitysdk.${VERSION}.zip" - SHA512 22dba68f5911588a6261fdc0192e8dca57edd25bdd3bc72b610ad468a0a6dbae51db1a9cdeadf4b57f6017136af78b07ef1357b1d93bedf4e6922076bd57efb5 + SHA512 aab78de3a9db35b1b11b2c2498d2dd19e66a71cdcd1cb426f0469d551fcd6917f4d80734be8b6d0c0b20f7f6ae4b5b9936b0b0aedb229ea49265932b36aee11e ) vcpkg_extract_source_archive( diff --git a/src/Modules/overlay-ports/dx-agility-sdk/vcpkg.json b/src/Modules/overlay-ports/dx-agility-sdk/vcpkg.json index c47a7886..2008b7c2 100644 --- a/src/Modules/overlay-ports/dx-agility-sdk/vcpkg.json +++ b/src/Modules/overlay-ports/dx-agility-sdk/vcpkg.json @@ -1,6 +1,6 @@ { "name": "directx-agility-sdk", - "version": "1.611.2", + "version": "1.614.0", "description": "The DirectX 12 Agility SDK allows developers to adopt the newest DirectX 12 graphics features faster than ever before.", "homepage": "https://devblogs.microsoft.com/directx/directx12agility/", "documentation": "https://devblogs.microsoft.com/directx/gettingstarted-dx12agility/", diff --git a/src/Rendering/include/litefx/rendering.hpp b/src/Rendering/include/litefx/rendering.hpp index ddf53a7a..93bf84c2 100644 --- a/src/Rendering/include/litefx/rendering.hpp +++ b/src/Rendering/include/litefx/rendering.hpp @@ -481,9 +481,13 @@ namespace LiteFX::Rendering { public: using ICommandBuffer::queue; using ICommandBuffer::dispatch; + using ICommandBuffer::dispatchIndirect; + using ICommandBuffer::dispatchMesh; using ICommandBuffer::draw; + using ICommandBuffer::drawIndirect; using ICommandBuffer::drawIndexed; + using ICommandBuffer::drawIndexedIndirect; using ICommandBuffer::barrier; using ICommandBuffer::transfer; using ICommandBuffer::generateMipMaps; @@ -582,6 +586,27 @@ namespace LiteFX::Rendering { /// virtual void pushConstants(const push_constants_layout_type& layout, const void* const memory) const noexcept = 0; + /// + virtual void dispatchIndirect(const buffer_type& batchBuffer, UInt32 batchCount, UInt64 offset = 0) const noexcept = 0; + + /// + virtual void dispatchMeshIndirect(const buffer_type& batchBuffer, UInt32 batchCount, UInt64 offset = 0) const noexcept = 0; + + /// + virtual void dispatchMeshIndirect(const buffer_type& batchBuffer, const buffer_type& countBuffer, UInt64 offset = 0, UInt64 countOffset = 0, UInt32 maxBatches = std::numeric_limits::max()) const noexcept = 0; + + /// + virtual void drawIndirect(const buffer_type& batchBuffer, UInt32 batchCount, UInt64 offset = 0) const noexcept = 0; + + /// + virtual void drawIndirect(const buffer_type& batchBuffer, const buffer_type& countBuffer, UInt64 offset = 0, UInt64 countOffset = 0, UInt32 maxBatches = std::numeric_limits::max()) const noexcept = 0; + + /// + virtual void drawIndexedIndirect(const buffer_type& batchBuffer, UInt32 batchCount, UInt64 offset = 0) const noexcept = 0; + + /// + virtual void drawIndexedIndirect(const buffer_type& batchBuffer, const buffer_type& countBuffer, UInt64 offset = 0, UInt64 countOffset = 0, UInt32 maxBatches = std::numeric_limits::max()) const noexcept = 0; + /// inline virtual void draw(const vertex_buffer_type& vertexBuffer, UInt32 instances = 1, UInt32 firstVertex = 0, UInt32 firstInstance = 0) const { this->bind(vertexBuffer); @@ -727,10 +752,30 @@ namespace LiteFX::Rendering { inline void cmdPushConstants(const IPushConstantsLayout& layout, const void* const memory) const noexcept override { this->pushConstants(dynamic_cast(layout), memory); } + + inline void cmdDispatchIndirect(const IBuffer& batchBuffer, UInt32 batchCount, UInt64 offset) const noexcept override { + this->dispatchIndirect(dynamic_cast(batchBuffer), batchCount, offset); + } + + inline void cmdDispatchMeshIndirect(const IBuffer& batchBuffer, UInt32 batchCount, UInt64 offset) const noexcept override { + this->dispatchMeshIndirect(dynamic_cast(batchBuffer), batchCount, offset); + } + + inline void cmdDispatchMeshIndirect(const IBuffer& batchBuffer, const IBuffer& countBuffer, UInt64 offset, UInt64 countOffset, UInt32 maxBatches) const noexcept override { + this->dispatchMeshIndirect(dynamic_cast(batchBuffer), dynamic_cast(countBuffer), offset, countOffset, maxBatches); + } inline void cmdDraw(const IVertexBuffer& vertexBuffer, UInt32 instances, UInt32 firstVertex, UInt32 firstInstance) const override { this->draw(dynamic_cast(vertexBuffer), instances, firstVertex, firstInstance); } + + inline void cmdDrawIndirect(const IBuffer& batchBuffer, UInt32 batchCount, UInt64 offset) const noexcept override { + this->drawIndirect(dynamic_cast(batchBuffer), batchCount, offset); + } + + inline void cmdDrawIndirect(const IBuffer& batchBuffer, const IBuffer& countBuffer, UInt64 offset, UInt64 countOffset, UInt32 maxBatches) const noexcept override { + this->drawIndirect(dynamic_cast(batchBuffer), dynamic_cast(countBuffer), offset, countOffset, maxBatches); + } inline void cmdDrawIndexed(const IIndexBuffer& indexBuffer, UInt32 instances, UInt32 firstIndex, Int32 vertexOffset, UInt32 firstInstance) const override { this->drawIndexed(dynamic_cast(indexBuffer), instances, firstIndex, vertexOffset, firstInstance); @@ -740,6 +785,14 @@ namespace LiteFX::Rendering { this->drawIndexed(dynamic_cast(vertexBuffer), dynamic_cast(indexBuffer), instances, firstIndex, vertexOffset, firstInstance); } + inline void cmdDrawIndexedIndirect(const IBuffer& batchBuffer, UInt32 batchCount, UInt64 offset) const noexcept override { + this->drawIndexedIndirect(dynamic_cast(batchBuffer), batchCount, offset); + } + + inline void cmdDrawIndexedIndirect(const IBuffer& batchBuffer, const IBuffer& countBuffer, UInt64 offset, UInt64 countOffset, UInt32 maxBatches) const noexcept override { + this->drawIndexedIndirect(dynamic_cast(batchBuffer), dynamic_cast(countBuffer), offset, countOffset, maxBatches); + } + inline void cmdExecute(SharedPtr commandBuffer) const override { this->execute(std::dynamic_pointer_cast(commandBuffer)); } diff --git a/src/Rendering/include/litefx/rendering_api.hpp b/src/Rendering/include/litefx/rendering_api.hpp index f5763989..f17c6ef4 100644 --- a/src/Rendering/include/litefx/rendering_api.hpp +++ b/src/Rendering/include/litefx/rendering_api.hpp @@ -456,6 +456,9 @@ namespace LiteFX::Rendering { /// /// /// In GLSL, use the buffer keyword to access storage buffers. In HLSL, use the RWStructuredBuffer keyword. + /// + /// When using shader reflection on `AppendStructuredBuffer`/`ConsumeStructuredBuffer`, this descriptor type will be deduced, but support for those constructs is not + /// implemented. Instead, use a separate descriptor and call `InterlockedAdd` on it. /// RWStructuredBuffer = 0x00000012, @@ -579,6 +582,25 @@ namespace LiteFX::Rendering { /// ShaderBindingTable = 0x00000007, + /// + /// Describes a buffer that stores data to generate indirect draw calls. + /// + /// + /// An indirect buffer refers to a buffer that contains a set of information used to generate dispatch or draw calls. A single dispatch or draw call in this + /// context is referred to as a *batch*. An indirect buffer must only contain one type of batches, e.g., it is not allowed to mix indexed and non-indexed batches + /// in a single indirect buffer. Batches need to be densely packed within the buffer, i.e., the stride between elements is always assumed to be `0`. + /// + /// Indirect buffers can be written from shaders, which enables use cases like GPU-culling, where a compute shader writes the batches in an indirect buffer, that + /// is then passed to an indirect draw call. In such situations, the number of batches in the buffer is typically not known beforehand, so an additional buffer + /// is used to store the number of draw calls in. + /// + /// Note that indirect drawing support is currently limited in how data can be passed to draw calls. This is due to Vulkan not providing an adequate interface for + /// describing per-draw bindings in the indirect signature. In DirectX 12, it is possible for batches to provide different vertex and index buffers, as well as + /// resource bindings for each draw call. Vulkan does only support draw calls that target already bound descriptors. Due to this limitation, it is currently best + /// practice to use bind-less descriptor arrays to pass per-draw data to draws and use a vertex attribute to index into the descriptor array. + /// + Indirect = 0x00000008, + /// /// Describes another type of buffer, such as samplers or images. /// @@ -1894,6 +1916,98 @@ namespace LiteFX::Rendering { #pragma endregion +#pragma region "Data Types" + + /// + /// An indirect batch used to execute an standard draw call. + /// + /// + /// + struct LITEFX_RENDERING_API alignas(16) IndirectBatch { + /// + /// The number of vertices of the mesh. + /// + /// + UInt32 VertexCount{ }; + + /// + /// The number of instances to draw of this mesh. + /// + /// + UInt32 InstanceCount{ }; + + /// + /// The index of the first vertex of the mesh in the vertex buffer. + /// + /// + UInt32 FirstVertex{ }; + + /// + /// The index of the first index to draw. This value is added to each instance index before obtaining per-instance data from the vertex buffer. + /// + /// + UInt32 FirstInstance{ }; + }; + + /// + /// An indirect batch used to execute an indexed draw call. + /// + /// + /// + struct LITEFX_RENDERING_API alignas(16) IndirectIndexedBatch { + /// + /// The number of indices in the mesh index buffer. + /// + /// + UInt32 IndexCount{ }; + + /// + /// The number of instances to draw of this mesh. + /// + /// + UInt32 InstanceCount{ }; + + /// + /// The first index in the index buffer used to draw the mesh. + /// + /// + UInt32 FirstIndex{ }; + + /// + /// An offset added to each index to obtain a vertex. + /// + Int32 VertexOffset{ }; + + /// + /// The index of the first index to draw. This value is added to each instance index before obtaining per-instance data from the vertex buffer. + /// + /// + UInt32 FirstInstance{ }; + }; + + /// + /// An indirect batch used to dispatch a compute shader kernel. + /// + /// + /// + struct LITEFX_RENDERING_API alignas(16) IndirectDispatchBatch { + /// + /// The number of threads into x-direction. + /// + UInt32 X{ 1 }; + + /// + /// The number of threads into y-direction. + /// + UInt32 Y{ 1 }; + + /// + /// The number of threads into z-direction. + /// + UInt32 Z{ 1 }; + }; +#pragma endregion + /// /// The interface for a state resource. /// @@ -4601,10 +4715,10 @@ namespace LiteFX::Rendering { public: /// - /// Updates a constant buffer within the current descriptor set. + /// Updates one or more buffer descriptors within the current descriptor set. /// /// The buffer binding point. - /// The constant buffer to write to the descriptor set. + /// The buffer to write to the descriptor set. /// The index of the first element in the buffer to bind to the descriptor set. /// The number of elements from the buffer to bind to the descriptor set. A value of `0` binds all available elements, starting at . /// The index of the first descriptor in the descriptor array to update. @@ -4613,7 +4727,7 @@ namespace LiteFX::Rendering { } /// - /// Updates a texture within the current descriptor set. + /// Updates one or more texture descriptors within the current descriptor set. /// /// /// The exact representation of the level and layer parameters depends on the dimension of the provided texture, as well as the type of the descriptor identified by the @@ -4638,7 +4752,7 @@ namespace LiteFX::Rendering { } /// - /// Updates a sampler within the current descriptor set. + /// Updates one or more sampler descriptors within the current descriptor set. /// /// The sampler binding point. /// The sampler to write to the descriptor set. @@ -6313,17 +6427,29 @@ namespace LiteFX::Rendering { /// /// Executes a compute shader. /// - /// The number of threads per dimension. - virtual void dispatch(const Vector3u& threadCount) const noexcept = 0; + /// The number of thread groups per dimension. + /// + virtual void dispatch(const Vector3u& threadGroupCount) const noexcept = 0; /// /// Executes a compute shader. /// - /// The number of threads along the x dimension. - /// The number of threads along the y dimension. - /// The number of threads along the z dimension. + /// The number of thread groups along the x dimension. + /// The number of thread groups along the y dimension. + /// The number of thread groups along the z dimension. inline void dispatch(UInt32 x, UInt32 y, UInt32 z) const noexcept { - this->dispatch({ x,y, z }); + this->dispatch({ x, y, z }); + } + + /// + /// Executes a set of indirect dispatches. + /// + /// The buffer that contains the batches. + /// The number of batches in the buffer to execute. + /// The offset (in bytes) to the first batch in the . + /// + inline void dispatchIndirect(const IBuffer& batchBuffer, UInt32 batchCount, UInt64 offset = 0) const noexcept { + this->cmdDispatchIndirect(batchBuffer, batchCount, offset); } /// @@ -6332,8 +6458,8 @@ namespace LiteFX::Rendering { /// /// This method is only supported if the feature is enabled. /// - /// The number of threads per dimension. - virtual void dispatchMesh(const Vector3u& threadCount) const noexcept = 0; + /// The number of thread groups per dimension. + virtual void dispatchMesh(const Vector3u& threadGroupCount) const noexcept = 0; /// /// Executes a mesh shader pipeline. @@ -6341,13 +6467,37 @@ namespace LiteFX::Rendering { /// /// This method is only supported if the feature is enabled. /// - /// The number of threads along the x dimension. - /// The number of threads along the y dimension. - /// The number of threads along the z dimension. + /// The number of thread groups along the x dimension. + /// The number of thread groups along the y dimension. + /// The number of thread groups along the z dimension. inline void dispatchMesh(UInt32 x, UInt32 y, UInt32 z) const noexcept { this->dispatchMesh({ x, y, z }); } + /// + /// Executes a set of indirect mesh shader dispatches. + /// + /// The buffer that contains the batches. + /// The number of batches in the buffer to execute. + /// The offset (in bytes) to the first batch in the . + /// + inline void dispatchMeshIndirect(const IBuffer& batchBuffer, UInt32 batchCount, UInt64 offset = 0) const noexcept { + this->cmdDispatchMeshIndirect(batchBuffer, batchCount, offset); + } + + /// + /// Executes a set of indirect mesh shader dispatches. + /// + /// The buffer that contains the batches. + /// The buffer that contains the number of batches to execute. + /// The offset (in bytes) to the first batch in the . + /// The offset (in bytes) to the number of batches in the . + /// The maximum number of batches executed, even if there are more batches in . + /// + inline void dispatchMeshIndirect(const IBuffer& batchBuffer, const IBuffer& countBuffer, UInt64 offset = 0, UInt64 countOffset = 0, UInt32 maxBatches = std::numeric_limits::max()) const noexcept { + this->cmdDispatchMeshIndirect(batchBuffer, countBuffer, offset, countOffset, maxBatches); + } + /// /// Executes a query on a ray-tracing pipeline. /// @@ -6389,27 +6539,9 @@ namespace LiteFX::Rendering { /// The number of instances to draw. /// The index of the first vertex to start drawing from. /// The index of the first instance to draw. + /// virtual void draw(UInt32 vertices, UInt32 instances = 1, UInt32 firstVertex = 0, UInt32 firstInstance = 0) const noexcept = 0; - /// - /// Draws the currently bound vertex buffer with a set of indices from the currently bound index buffer. - /// - /// The number of indices to draw. - /// The number of instances to draw. - /// The index of the first element of the index buffer to start drawing from. - /// The offset added to each index to find the corresponding vertex. - /// The index of the first instance to draw. - virtual void drawIndexed(UInt32 indices, UInt32 instances = 1, UInt32 firstIndex = 0, Int32 vertexOffset = 0, UInt32 firstInstance = 0) const noexcept = 0; - - /// - /// Pushes a block of memory into the push constants backing memory. - /// - /// The layout of the push constants to update. - /// A pointer to the source memory. - inline void pushConstants(const IPushConstantsLayout& layout, const void* const memory) const noexcept { - this->cmdPushConstants(layout, memory); - } - /// /// Draws all vertices from the vertex buffer provided in . /// @@ -6424,6 +6556,41 @@ namespace LiteFX::Rendering { this->cmdDraw(vertexBuffer, instances, firstVertex, firstInstance); } + /// + /// Executes a set of indirect non-indexed draw calls. + /// + /// The buffer that contains the batches. + /// The number of batches in the buffer to execute. + /// The offset (in bytes) to the first batch in the . + /// + inline void drawIndirect(const IBuffer& batchBuffer, UInt32 batchCount, UInt64 offset = 0) const noexcept { + this->cmdDrawIndirect(batchBuffer, batchCount, offset); + } + + /// + /// Executes a set of indirect non-indexed draw calls. + /// + /// The buffer that contains the batches. + /// The buffer that contains the number of batches to execute. + /// The offset (in bytes) to the first batch in the . + /// The offset (in bytes) to the number of batches in the . + /// The maximum number of batches executed, even if there are more batches in . + /// + inline void drawIndirect(const IBuffer& batchBuffer, const IBuffer& countBuffer, UInt64 offset = 0, UInt64 countOffset = 0, UInt32 maxBatches = std::numeric_limits::max()) const noexcept { + this->cmdDrawIndirect(batchBuffer, countBuffer, offset, countOffset, maxBatches); + } + + /// + /// Draws the currently bound vertex buffer with a set of indices from the currently bound index buffer. + /// + /// The number of indices to draw. + /// The number of instances to draw. + /// The index of the first element of the index buffer to start drawing from. + /// The offset added to each index to find the corresponding vertex. + /// The index of the first instance to draw. + /// + virtual void drawIndexed(UInt32 indices, UInt32 instances = 1, UInt32 firstIndex = 0, Int32 vertexOffset = 0, UInt32 firstInstance = 0) const noexcept = 0; + /// /// Draws the currently bound vertex buffer using the index buffer provided in . /// @@ -6455,6 +6622,39 @@ namespace LiteFX::Rendering { this->cmdDrawIndexed(vertexBuffer, indexBuffer, instances, firstIndex, vertexOffset, firstInstance); } + /// + /// Executes a set of indirect indexed draw calls. + /// + /// The buffer that contains the batches. + /// The number of batches in the buffer to execute. + /// The offset (in bytes) to the first batch in the . + /// + inline void drawIndexedIndirect(const IBuffer& batchBuffer, UInt32 batchCount, UInt64 offset = 0) const noexcept { + this->cmdDrawIndexedIndirect(batchBuffer, batchCount, offset); + } + + /// + /// Executes a set of indirect indexed draw calls. + /// + /// The buffer that contains the batches. + /// The buffer that contains the number of batches to execute. + /// The offset (in bytes) to the first batch in the . + /// The offset (in bytes) to the number of batches in the . + /// The maximum number of batches executed, even if there are more batches in . + /// + inline void drawIndexedIndirect(const IBuffer& batchBuffer, const IBuffer& countBuffer, UInt64 offset = 0, UInt64 countOffset = 0, UInt32 maxBatches = std::numeric_limits::max()) const noexcept { + this->cmdDrawIndexedIndirect(batchBuffer, countBuffer, offset, countOffset, maxBatches); + } + + /// + /// Pushes a block of memory into the push constants backing memory. + /// + /// The layout of the push constants to update. + /// A pointer to the source memory. + inline void pushConstants(const IPushConstantsLayout& layout, const void* const memory) const noexcept { + this->cmdPushConstants(layout, memory); + } + /// /// Sets the viewports used for the subsequent draw calls. /// @@ -6651,9 +6851,16 @@ namespace LiteFX::Rendering { virtual void cmdBind(const IVertexBuffer& buffer) const noexcept = 0; virtual void cmdBind(const IIndexBuffer& buffer) const noexcept = 0; virtual void cmdPushConstants(const IPushConstantsLayout& layout, const void* const memory) const noexcept = 0; + virtual void cmdDispatchIndirect(const IBuffer& batchBuffer, UInt32 batchCount, UInt64 offset) const noexcept = 0; + virtual void cmdDispatchMeshIndirect(const IBuffer& batchBuffer, UInt32 batchCount, UInt64 offset) const noexcept = 0; + virtual void cmdDispatchMeshIndirect(const IBuffer& batchBuffer, const IBuffer& countBuffer, UInt64 offset, UInt64 countOffset, UInt32 maxBatches) const noexcept = 0; virtual void cmdDraw(const IVertexBuffer& vertexBuffer, UInt32 instances, UInt32 firstVertex, UInt32 firstInstance) const = 0; + virtual void cmdDrawIndirect(const IBuffer& batchBuffer, UInt32 batchCount, UInt64 offset) const noexcept = 0; + virtual void cmdDrawIndirect(const IBuffer& batchBuffer, const IBuffer& countBuffer, UInt64 offset, UInt64 countOffset, UInt32 maxBatches) const noexcept = 0; virtual void cmdDrawIndexed(const IIndexBuffer& indexBuffer, UInt32 instances, UInt32 firstIndex, Int32 vertexOffset, UInt32 firstInstance) const = 0; virtual void cmdDrawIndexed(const IVertexBuffer& vertexBuffer, const IIndexBuffer& indexBuffer, UInt32 instances, UInt32 firstIndex, Int32 vertexOffset, UInt32 firstInstance) const = 0; + virtual void cmdDrawIndexedIndirect(const IBuffer& batchBuffer, UInt32 batchCount, UInt64 offset) const noexcept = 0; + virtual void cmdDrawIndexedIndirect(const IBuffer& batchBuffer, const IBuffer& countBuffer, UInt64 offset, UInt64 countOffset, UInt32 maxBatches) const noexcept = 0; virtual void cmdExecute(SharedPtr commandBuffer) const = 0; virtual void cmdExecute(Enumerable> commandBuffer) const = 0; virtual void cmdBuildAccelerationStructure(IBottomLevelAccelerationStructure& blas, const SharedPtr scratchBuffer, const IBuffer& buffer, UInt64 offset) const = 0; @@ -8240,6 +8447,11 @@ namespace LiteFX::Rendering { /// Enables or disables ray query and inline ray-tracing support. /// bool RayQueries { false }; + + /// + /// Enables or disables support for indirect draw. + /// + bool DrawIndirect { false }; }; /// diff --git a/src/Rendering/include/litefx/rendering_builders.hpp b/src/Rendering/include/litefx/rendering_builders.hpp index c346dfd9..02d752ce 100644 --- a/src/Rendering/include/litefx/rendering_builders.hpp +++ b/src/Rendering/include/litefx/rendering_builders.hpp @@ -992,14 +992,6 @@ namespace LiteFX::Rendering { /// UInt32 space; - /// - /// The maximum size of unbounded (i.e., bindless) descriptor arrays. - /// - /// - /// This setting is only required in Vulkan. For DirectX 12 it is ignored. - /// - UInt32 maxUnboundedArraySize; - /// /// The shader stages, the descriptor set is accessible from. /// diff --git a/src/Samples/Compute/CMakeLists.txt b/src/Samples/Compute/CMakeLists.txt index 5780379d..5eaf9f66 100644 --- a/src/Samples/Compute/CMakeLists.txt +++ b/src/Samples/Compute/CMakeLists.txt @@ -24,24 +24,24 @@ ENDIF(NOT RENDERDOC_INCLUDE_DIR AND LITEFX_BUILD_EXAMPLES_RENDERDOC_LOADER) CONFIGURE_FILE("../config.tmpl" "${CMAKE_CURRENT_BINARY_DIR}/src/config.h") # Collect header & source files. -SET(SAMPLE_BASIC_RENDERING_HEADERS +SET(SAMPLE_COMPUTE_HEADERS "src/sample.h" ) -SET(SAMPLE_BASIC_RENDERING_SOURCES +SET(SAMPLE_COMPUTE_SOURCES "src/main.cpp" "src/sample.cpp" ) # Add shared library project. ADD_EXECUTABLE(${PROJECT_NAME} - ${SAMPLE_BASIC_RENDERING_HEADERS} - ${SAMPLE_BASIC_RENDERING_SOURCES} + ${SAMPLE_COMPUTE_HEADERS} + ${SAMPLE_COMPUTE_SOURCES} "${CMAKE_CURRENT_BINARY_DIR}/src/config.h" ) # Create source groups for better code organization. -SOURCE_GROUP(TREE ${CMAKE_CURRENT_SOURCE_DIR} FILES ${SAMPLE_BASIC_RENDERING_HEADERS} ${SAMPLE_BASIC_RENDERING_SOURCES}) +SOURCE_GROUP(TREE ${CMAKE_CURRENT_SOURCE_DIR} FILES ${SAMPLE_COMPUTE_HEADERS} ${SAMPLE_COMPUTE_SOURCES}) # Setup project properties. SET_TARGET_PROPERTIES(${PROJECT_NAME} PROPERTIES @@ -115,4 +115,4 @@ INSTALL(TARGETS ${PROJECT_NAME} EXPORT LiteFXSamples # Export config. INSTALL(EXPORT LiteFXSamples DESTINATION ${CMAKE_INSTALL_EXPORT_DIR}) -EXPORT(TARGETS ${PROJECT_NAME} FILE LiteFXSamplesBasicRenderingConfig.cmake) \ No newline at end of file +EXPORT(TARGETS ${PROJECT_NAME} FILE LiteFXSamplesComputeConfig.cmake) \ No newline at end of file diff --git a/src/Samples/Compute/src/sample.cpp b/src/Samples/Compute/src/sample.cpp index 1c658bb3..6928ab81 100644 --- a/src/Samples/Compute/src/sample.cpp +++ b/src/Samples/Compute/src/sample.cpp @@ -465,7 +465,7 @@ void SampleApp::drawFrame() commandBuffer->bind(postBindings); // Dispatch the post-processing pass. - commandBuffer->dispatch({ static_cast(image.extent().x()), static_cast(image.extent().y()), 1 }); + commandBuffer->dispatch({ static_cast(image.extent().x()) / 8, static_cast(image.extent().y()) / 8, 1 }); // After post-processing, transition the image back into a state where it can be copied from. barrier = m_device->makeBarrier(PipelineStage::Compute, PipelineStage::None); diff --git a/src/Samples/Indirect/CMakeLists.txt b/src/Samples/Indirect/CMakeLists.txt new file mode 100644 index 00000000..a8ed6c36 --- /dev/null +++ b/src/Samples/Indirect/CMakeLists.txt @@ -0,0 +1,121 @@ +################################################################################################### +##### ##### +##### LiteFX.Samples.Indirect - Contains the indirect rendering sample. ##### +##### ##### +################################################################################################### + +PROJECT(LiteFX.Samples.Indirect VERSION ${LITEFX_VERSION} LANGUAGES CXX) +MESSAGE(STATUS "Initializing: ${PROJECT_NAME}...") + +# Indirect draw requires shader model 6.8 or later. +STRING(REPLACE "_" ";" SHADER_MODEL ${LITEFX_BUILD_HLSL_SHADER_MODEL}) +LIST(GET SHADER_MODEL 0 SHADER_MODEL_MAJOR) +LIST(GET SHADER_MODEL 1 SHADER_MODEL_MINOR) +SET(SAMPLE_SHADER_MODEL ${LITEFX_BUILD_HLSL_SHADER_MODEL}) + +IF(${SHADER_MODEL_MAJOR} LESS 6 OR (${SHADER_MODEL_MAJOR} EQUAL 6 AND ${SHADER_MODEL_MINOR} LESS 8)) + MESSAGE(WARNING "The indirect draw sample requires shader model 6.8 or later, but ${SHADER_MODEL_MAJOR}.${SHADER_MODEL_MINOR} is selected. The sample will be built with shader model 6.8 and will not work if your system does not support it.") + SET(SAMPLE_SHADER_MODEL "6_8") +ENDIF(${SHADER_MODEL_MAJOR} LESS 6 OR (${SHADER_MODEL_MAJOR} EQUAL 6 AND ${SHADER_MODEL_MINOR} LESS 8)) + +IF(NOT LITEFX_BUILD_WITH_GLM) + MESSAGE(FATAL_ERROR "This sample requires the glm converters for the math module. Set the LITEFX_BUILD_WITH_GLM option to ON and retry.") +ENDIF(NOT LITEFX_BUILD_WITH_GLM) + +# Resolve package dependencies. +FIND_PACKAGE(glfw3 CONFIG REQUIRED) +FIND_PACKAGE(cli11 CONFIG REQUIRED) +FIND_PATH(RENDERDOC_INCLUDE_DIR "renderdoc_app.h") + +IF(NOT RENDERDOC_INCLUDE_DIR AND LITEFX_BUILD_EXAMPLES_RENDERDOC_LOADER) + MESSAGE(WARNING "RenderDoc is not installed on the system. Loader will not be created.") + SET(LITEFX_BUILD_EXAMPLES_RENDERDOC_LOADER OFF CACHE BOOL "" FORCE) +ENDIF(NOT RENDERDOC_INCLUDE_DIR AND LITEFX_BUILD_EXAMPLES_RENDERDOC_LOADER) + +CONFIGURE_FILE("../config.tmpl" "${CMAKE_CURRENT_BINARY_DIR}/src/config.h") + +# Collect header & source files. +SET(SAMPLE_INDIRECT_HEADERS + "src/sample.h" +) + +SET(SAMPLE_INDIRECT_SOURCES + "src/main.cpp" + "src/sample.cpp" +) + +# Add shared library project. +ADD_EXECUTABLE(${PROJECT_NAME} + ${SAMPLE_INDIRECT_HEADERS} + ${SAMPLE_INDIRECT_SOURCES} + "${CMAKE_CURRENT_BINARY_DIR}/src/config.h" +) + +# Create source groups for better code organization. +SOURCE_GROUP(TREE ${CMAKE_CURRENT_SOURCE_DIR} FILES ${SAMPLE_INDIRECT_HEADERS} ${SAMPLE_INDIRECT_SOURCES}) + +# Setup project properties. +SET_TARGET_PROPERTIES(${PROJECT_NAME} PROPERTIES + FOLDER "Samples" + VERSION ${LITEFX_VERSION} + SOVERSION ${LITEFX_YEAR} +) + +# Setup target include directories. +TARGET_INCLUDE_DIRECTORIES(${PROJECT_NAME} PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/src/") + +IF(LITEFX_BUILD_EXAMPLES_RENDERDOC_LOADER) + TARGET_INCLUDE_DIRECTORIES(${PROJECT_NAME} PRIVATE ${RENDERDOC_INCLUDE_DIR}) +ENDIF(LITEFX_BUILD_EXAMPLES_RENDERDOC_LOADER) + +# Link project dependencies. +TARGET_LINK_LIBRARIES(${PROJECT_NAME} PRIVATE LiteFX.Core LiteFX.Math LiteFX.AppModel LiteFX.Rendering glfw CLI11::CLI11) + +IF(LITEFX_BUILD_VULKAN_BACKEND) + TARGET_LINK_LIBRARIES(${PROJECT_NAME} PRIVATE LiteFX.Backends.Vulkan) + + ADD_SHADER_MODULE(${PROJECT_NAME}.Vk.Shaders.VS SOURCE "shaders/indirect_vs.hlsl" LANGUAGE HLSL TYPE VERTEX COMPILE_AS SPIRV SHADER_MODEL ${SAMPLE_SHADER_MODEL} COMPILER DXC) + ADD_SHADER_MODULE(${PROJECT_NAME}.Vk.Shaders.FS SOURCE "shaders/indirect_fs.hlsl" LANGUAGE HLSL TYPE FRAGMENT COMPILE_AS SPIRV SHADER_MODEL ${SAMPLE_SHADER_MODEL} COMPILER DXC) + ADD_SHADER_MODULE(${PROJECT_NAME}.Vk.Shaders.CS SOURCE "shaders/indirect_cull_cs.hlsl" LANGUAGE HLSL TYPE COMPUTE COMPILE_AS SPIRV SHADER_MODEL ${SAMPLE_SHADER_MODEL} COMPILER DXC) + SET_TARGET_PROPERTIES(${PROJECT_NAME}.Vk.Shaders.VS PROPERTIES FOLDER "Samples/Shaders/Vulkan") + SET_TARGET_PROPERTIES(${PROJECT_NAME}.Vk.Shaders.FS PROPERTIES FOLDER "Samples/Shaders/Vulkan") + SET_TARGET_PROPERTIES(${PROJECT_NAME}.Vk.Shaders.CS PROPERTIES FOLDER "Samples/Shaders/Vulkan") + + TARGET_LINK_SHADERS(${PROJECT_NAME} + INSTALL_DESTINATION "${CMAKE_INSTALL_BINARY_DIR}/${SHADER_DEFAULT_SUBDIR}" + SHADERS ${PROJECT_NAME}.Vk.Shaders.VS ${PROJECT_NAME}.Vk.Shaders.FS ${PROJECT_NAME}.Vk.Shaders.CS + ) +ENDIF(LITEFX_BUILD_VULKAN_BACKEND) + +IF(LITEFX_BUILD_DIRECTX_12_BACKEND) + TARGET_LINK_LIBRARIES(${PROJECT_NAME} PRIVATE LiteFX.Backends.DirectX12) + + ADD_SHADER_MODULE(${PROJECT_NAME}.Dx.Shaders.VS SOURCE "shaders/indirect_vs.hlsl" LANGUAGE HLSL TYPE VERTEX COMPILE_AS DXIL SHADER_MODEL ${SAMPLE_SHADER_MODEL} COMPILER DXC) + ADD_SHADER_MODULE(${PROJECT_NAME}.Dx.Shaders.PS SOURCE "shaders/indirect_fs.hlsl" LANGUAGE HLSL TYPE PIXEL COMPILE_AS DXIL SHADER_MODEL ${SAMPLE_SHADER_MODEL} COMPILER DXC) + ADD_SHADER_MODULE(${PROJECT_NAME}.Dx.Shaders.CS SOURCE "shaders/indirect_cull_cs.hlsl" LANGUAGE HLSL TYPE COMPUTE COMPILE_AS DXIL SHADER_MODEL ${SAMPLE_SHADER_MODEL} COMPILER DXC) + SET_TARGET_PROPERTIES(${PROJECT_NAME}.Dx.Shaders.VS PROPERTIES FOLDER "Samples/Shaders/DirectX 12") + SET_TARGET_PROPERTIES(${PROJECT_NAME}.Dx.Shaders.PS PROPERTIES FOLDER "Samples/Shaders/DirectX 12") + SET_TARGET_PROPERTIES(${PROJECT_NAME}.Dx.Shaders.CS PROPERTIES FOLDER "Samples/Shaders/DirectX 12") + + TARGET_LINK_SHADERS(${PROJECT_NAME} + INSTALL_DESTINATION "${CMAKE_INSTALL_BINARY_DIR}/${SHADER_DEFAULT_SUBDIR}" + SHADERS ${PROJECT_NAME}.Dx.Shaders.VS ${PROJECT_NAME}.Dx.Shaders.PS ${PROJECT_NAME}.Dx.Shaders.CS + ) +ENDIF(LITEFX_BUILD_DIRECTX_12_BACKEND) + +# Re-use pre-compiled core header. +IF(LITEFX_BUILD_PRECOMPILED_HEADERS) + TARGET_PRECOMPILE_HEADERS(${PROJECT_NAME} REUSE_FROM LiteFX.Core) +ENDIF(LITEFX_BUILD_PRECOMPILED_HEADERS) + +# Setup installer. +INSTALL(TARGETS ${PROJECT_NAME} EXPORT LiteFXSamples + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBRARY_DIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBRARY_DIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINARY_DIR} + INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDE_DIR} +) + +# Export config. +INSTALL(EXPORT LiteFXSamples DESTINATION ${CMAKE_INSTALL_EXPORT_DIR}) +EXPORT(TARGETS ${PROJECT_NAME} FILE LiteFXSamplesIndirectConfig.cmake) \ No newline at end of file diff --git a/src/Samples/Indirect/shaders/indirect_cull_cs.hlsl b/src/Samples/Indirect/shaders/indirect_cull_cs.hlsl new file mode 100644 index 00000000..0bea78df --- /dev/null +++ b/src/Samples/Indirect/shaders/indirect_cull_cs.hlsl @@ -0,0 +1,73 @@ +#pragma pack_matrix(row_major) + +struct Object +{ + float4x4 Transform; + float4 Color; + float BoundingRadius; + uint IndexCount; + uint FirstIndex; + int VertexOffset; +}; + +struct Camera +{ + float4x4 ViewProjection; + float4x4 Projection; + float4 Position; + float4 Forward; + float4 Up; + float4 Right; + float NearPlane; + float FarPlane; + float4 Frustum[6]; +}; + +struct IndirectDrawCommand +{ + uint IndexCount; + uint InstanceCount; + uint FirstIndex; + int VertexOffset; + uint FirstInstance; + uint3 Padding; +}; + +ConstantBuffer camera : register(b0, space0); +StructuredBuffer objects : register(t0, space1); + +// NOTE: Traditionally one would use an AppendStructuredBuffer here, however binding to the counter resource is somewhat convoluted and requires separate bindings +// in Vulkan anyway. Using and RWByteAddressBuffer and explicitly calling `InterlockedAdd` is the more portable solution and should not result in performance +// degradation on modern GPUs. +// The `globallycoherent` storage class makes changes to the counter visible to all thread groups. +globallycoherent RWByteAddressBuffer drawCounter : register(u0, space2); +RWStructuredBuffer drawCommands : register(u1, space2); + +[numthreads(128, 1, 1)] +void main(uint3 id : SV_DispatchThreadID) +{ + uint instanceId = NonUniformResourceIndex(id.x); + Object object = objects.Load(instanceId); + + float3 center = object.Transform[3].xyz; // Get the object translation. + float radius = object.BoundingRadius; // Scale is ignored here at the moment... + + bool culled = false; + + // Cull against every frustum plane, except the far plane (which is the last one). + [unroll(5)] + for (int i = 0; i < 5; i++) + culled = culled || dot(center, camera.Frustum[i].xyz) + radius < 0; + + if (!culled) + { + // Store the command. + uint index; + drawCounter.InterlockedAdd(0, 1, index); + drawCommands[index].IndexCount = object.IndexCount; + drawCommands[index].InstanceCount = 1; + drawCommands[index].FirstIndex = object.FirstIndex; + drawCommands[index].VertexOffset = object.VertexOffset; + drawCommands[index].FirstInstance = instanceId; + } +} \ No newline at end of file diff --git a/src/Samples/Indirect/shaders/indirect_fs.hlsl b/src/Samples/Indirect/shaders/indirect_fs.hlsl new file mode 100644 index 00000000..5040528e --- /dev/null +++ b/src/Samples/Indirect/shaders/indirect_fs.hlsl @@ -0,0 +1,23 @@ +#pragma pack_matrix(row_major) + +struct VertexData +{ + float4 Position : SV_POSITION; + float4 Color : COLOR; +}; + +struct FragmentData +{ + float4 Color : SV_TARGET; + float Depth : SV_DEPTH; +}; + +FragmentData main(VertexData input) +{ + FragmentData fragment; + + fragment.Depth = input.Position.z; + fragment.Color = input.Color; + + return fragment; +} \ No newline at end of file diff --git a/src/Samples/Indirect/shaders/indirect_vs.hlsl b/src/Samples/Indirect/shaders/indirect_vs.hlsl new file mode 100644 index 00000000..3bb30008 --- /dev/null +++ b/src/Samples/Indirect/shaders/indirect_vs.hlsl @@ -0,0 +1,56 @@ +#pragma pack_matrix(row_major) + +struct VertexData +{ + float4 Position : SV_POSITION; + float4 Color : COLOR; +}; + +struct VertexInput +{ + float3 Position : POSITION; + +#ifdef __spirv__ + [[vk::builtin("BaseInstance")]] +#endif + uint ModelID : SV_StartInstanceLocation; +}; + +struct Camera +{ + float4x4 ViewProjection; + float4x4 Projection; + float4 Position; + float4 Forward; + float4 Up; + float4 Right; + float NearPlane; + float FarPlane; + float4 Frustum[6]; +}; + +struct Object +{ + float4x4 Transform; + float4 Color; + float BoundingRadius; + uint IndexCount; + uint FirstIndex; + int VertexOffset; +}; + +ConstantBuffer camera : register(b0, space0); +StructuredBuffer objects : register(t0, space1); + +VertexData main(in VertexInput input) +{ + VertexData vertex; + Object object = objects.Load(input.ModelID); + + float4 position = mul(float4(input.Position, 1.0), object.Transform); + + vertex.Position = mul(position, camera.ViewProjection); + vertex.Color = object.Color; + + return vertex; +} \ No newline at end of file diff --git a/src/Samples/Indirect/src/main.cpp b/src/Samples/Indirect/src/main.cpp new file mode 100644 index 00000000..66e2d8b0 --- /dev/null +++ b/src/Samples/Indirect/src/main.cpp @@ -0,0 +1,165 @@ +#define LITEFX_DEFINE_GLOBAL_EXPORTS +#define LITEFX_AUTO_IMPORT_BACKEND_HEADERS +#include + +#include "sample.h" + +// CLI11 parses optional values as double by default, which yields an implicit-cast warning. +#pragma warning(disable: 4244) + +#include +#include +#include +#include + +#ifdef LITEFX_BUILD_EXAMPLES_DX12_PIX_LOADER +bool loadPixCapturer() +{ + // Check if Pix has already been loaded. + if (::GetModuleHandleW(L"WinPixGpuCapturer.dll") != 0) + return true; + + // Search for latest version of Pix. + LPWSTR programFilesPath = nullptr; + ::SHGetKnownFolderPath(FOLDERID_ProgramFiles, KF_FLAG_DEFAULT, NULL, &programFilesPath); + + std::filesystem::path pixInstallationPath = programFilesPath; + pixInstallationPath /= "Microsoft PIX"; + + std::wstring newestVersionFound; + + for (auto const& directory_entry : std::filesystem::directory_iterator(pixInstallationPath)) + if (directory_entry.is_directory()) + if (newestVersionFound.empty() || newestVersionFound < directory_entry.path().filename().c_str()) + newestVersionFound = directory_entry.path().filename().c_str(); + + if (newestVersionFound.empty()) + return false; + + auto pixPath = pixInstallationPath / newestVersionFound / L"WinPixGpuCapturer.dll"; + std::wcout << "Found PIX: " << pixPath.c_str() << std::endl; + ::LoadLibraryW(pixPath.c_str()); + + return true; +} +#endif // LITEFX_BUILD_EXAMPLES_DX12_PIX_LOADER + +#ifdef LITEFX_BUILD_EXAMPLES_RENDERDOC_LOADER +RENDERDOC_API_1_5_0* renderDoc = nullptr; + +bool loadRenderDocApi() +{ + HMODULE renderDocModule = ::GetModuleHandleW(L"renderdoc.dll"); + + if (renderDocModule != 0) + { + pRENDERDOC_GetAPI RENDERDOC_GetAPI = (pRENDERDOC_GetAPI)::GetProcAddress(renderDocModule, "RENDERDOC_GetAPI"); + int result = RENDERDOC_GetAPI(eRENDERDOC_API_Version_1_5_0, reinterpret_cast(&::renderDoc)); + + return result == 1; + } + + return false; +} +#endif // LITEFX_BUILD_EXAMPLES_RENDERDOC_LOADER + +int main(const int argc, const char** argv) +{ +#if WIN32 + // Enable console colors. + HANDLE console = ::GetStdHandle(STD_OUTPUT_HANDLE); + DWORD consoleMode = 0; + + if (console == INVALID_HANDLE_VALUE || !::GetConsoleMode(console, &consoleMode)) + return ::GetLastError(); + + ::SetConsoleMode(console, consoleMode | ENABLE_VIRTUAL_TERMINAL_PROCESSING); +#endif + + // Parse the command line parameters. + const String appName = SampleApp::Name(); + + CLI::App app{ "Demonstrates indirect drawing techniques.", appName }; + + Optional adapterId; + app.add_option("-a,--adapter", adapterId)->take_first(); + auto validationLayers = app.add_option("-l,--vk-validation-layers")->take_all(); + +#ifdef LITEFX_BUILD_EXAMPLES_DX12_PIX_LOADER + bool loadPix{ false }; + app.add_option("--dx-load-pix", loadPix)->take_first(); +#endif // LITEFX_BUILD_EXAMPLES_DX12_PIX_LOADER + +#ifdef LITEFX_BUILD_EXAMPLES_RENDERDOC_LOADER + bool loadRenderDoc{ false }; + app.add_option("--load-render-doc", loadRenderDoc)->take_first(); +#endif // LITEFX_BUILD_EXAMPLES_RENDERDOC_LOADER + + try + { + app.parse(argc, argv); + } + catch (const CLI::ParseError& ex) + { + return app.exit(ex); + } + +#ifdef LITEFX_BUILD_EXAMPLES_DX12_PIX_LOADER + if (loadPix && !loadPixCapturer()) + std::cout << "No PIX distribution found. Make sure you have installed PIX for Windows." << std::endl; +#endif // LITEFX_BUILD_EXAMPLES_DX12_PIX_LOADER + +#ifdef LITEFX_BUILD_EXAMPLES_RENDERDOC_LOADER + if (loadRenderDoc && !loadRenderDocApi()) + std::cout << "RenderDoc API could not be loaded. Make sure you have version 1.5 or higher installed on your system." << std::endl; +#endif // LITEFX_BUILD_EXAMPLES_RENDERDOC_LOADER + + // Turn the validation layers into a list. + Array enabledLayers; + + if (validationLayers->count() > 0) + for (const auto& result : validationLayers->results()) + enabledLayers.push_back(result); + + // Create glfw window. + if (!::glfwInit()) + throw std::runtime_error("Unable to initialize glfw."); + + ::glfwWindowHint(GLFW_CLIENT_API, GLFW_NO_API); + ::glfwWindowHint(GLFW_RESIZABLE, GLFW_TRUE); + + auto window = GlfwWindowPtr(::glfwCreateWindow(800, 600, appName.c_str(), nullptr, nullptr)); + + // Get the required Vulkan extensions from glfw. + uint32_t extensions = 0; + const char** extensionNames = ::glfwGetRequiredInstanceExtensions(&extensions); + Array requiredExtensions; + + for (uint32_t i(0); i < extensions; ++i) + requiredExtensions.push_back(String(extensionNames[i])); + + // Create the app. + try + { + UniquePtr app = App::build(std::move(window), adapterId) + .logTo(LogLevel::Trace) + .logTo("sample.log", LogLevel::Debug) +#ifdef LITEFX_BUILD_VULKAN_BACKEND + .useBackend(requiredExtensions, enabledLayers) +#endif // LITEFX_BUILD_VULKAN_BACKEND +#ifdef LITEFX_BUILD_DIRECTX_12_BACKEND + .useBackend() +#endif // LITEFX_BUILD_DIRECTX_12_BACKEND + ; + + app->run(); + } + catch (const LiteFX::Exception& ex) + { + std::cerr << "\033[3;41;37mUnhandled exception: " << ex.what() << '\n' << "at: " << ex.trace() << "\033[0m" << std::endl; + + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} \ No newline at end of file diff --git a/src/Samples/Indirect/src/sample.cpp b/src/Samples/Indirect/src/sample.cpp new file mode 100644 index 00000000..0d37519a --- /dev/null +++ b/src/Samples/Indirect/src/sample.cpp @@ -0,0 +1,542 @@ +#include "sample.h" + +#define GLM_ENABLE_EXPERIMENTAL +#include +#include + +constexpr UInt32 NUM_INSTANCES = 163840u; // 10 * 128 * 128 + +enum DescriptorSets : UInt32 +{ + PerFrame = 0, + Constant = 1, + Indirect = 2 +}; + +const Array vertices = +{ + { { -0.5f, -0.5f, 0.5f }, { 1.0f, 0.0f, 0.0f, 1.0f }, { 0.0f, 0.0f, 0.0f }, { 0.0f, 0.0f } }, + { { 0.5f, 0.5f, 0.5f }, { 0.0f, 1.0f, 0.0f, 1.0f }, { 0.0f, 0.0f, 0.0f }, { 0.0f, 0.0f } }, + { { -0.5f, 0.5f, -0.5f }, { 0.0f, 0.0f, 1.0f, 1.0f }, { 0.0f, 0.0f, 0.0f }, { 0.0f, 0.0f } }, + { { 0.5f, -0.5f, -0.5f }, { 1.0f, 1.0f, 1.0f, 1.0f }, { 0.0f, 0.0f, 0.0f }, { 0.0f, 0.0f } } +}; + +const Array indices = { 0, 2, 1, 0, 1, 3, 0, 3, 2, 1, 2, 3 }; + +struct alignas(16) CameraBuffer { + glm::mat4 ViewProjection; + glm::mat4 Projection; + glm::vec4 Position; + glm::vec4 Forward; + glm::vec4 Up; + glm::vec4 Right; + float NearPlane; + float FarPlane; + glm::vec2 Padding; + glm::vec4 Frustum[6]; +} camera; + +struct alignas(16) ObjectBuffer { + glm::mat4 Transform; + glm::vec4 Color; + float BoundingRadius; + UInt32 IndexCount; + UInt32 FirstIndex; + int VertexOffset; +} objects[NUM_INSTANCES]; + +template requires + meta::implements +struct FileExtensions { + static const String SHADER; +}; + +#ifdef LITEFX_BUILD_VULKAN_BACKEND +const String FileExtensions::SHADER = "spv"; +#endif // LITEFX_BUILD_VULKAN_BACKEND +#ifdef LITEFX_BUILD_DIRECTX_12_BACKEND +const String FileExtensions::SHADER = "dxi"; +#endif // LITEFX_BUILD_DIRECTX_12_BACKEND + +static constexpr inline glm::vec4 normalizePlane(const glm::vec4& plane) { + return plane / glm::length(glm::vec3(plane)); +} + +static inline void initializeObjects() { + std::srand(std::time(nullptr)); + + for (UInt32 i{ 0 }; i < NUM_INSTANCES; ++i) + { + int x = i % 128; + int y = (i / 128) % 128; + int z = i / 16384; + + // Move the center instance slightly down, so that it does not stick into the camera view. + auto position = glm::vec3(x - 64, y - 64, z - 5) * 2.0f; + + if (x == 64 && y == 64 && z == 5) + position.z -= 0.35f; + + auto& instance = objects[i]; + instance.Transform = glm::translate(glm::identity(), position) * glm::eulerAngleXYZ(std::rand() / (float)RAND_MAX, std::rand() / (float)RAND_MAX, std::rand() / (float)RAND_MAX); + instance.Color = glm::vec4(std::rand() / (float)RAND_MAX, std::rand() / (float)RAND_MAX, std::rand() / (float)RAND_MAX, 1.0f); + instance.BoundingRadius = glm::length(glm::vec3(0.5f, 0.5f, 0.5f)); + instance.FirstIndex = 0; + instance.VertexOffset = 0; + instance.IndexCount = 12; + } +} + +template requires + meta::implements +void initRenderGraph(TRenderBackend* backend, SharedPtr& inputAssemblerState) +{ + using RenderPass = TRenderBackend::render_pass_type; + using RenderPipeline = TRenderBackend::render_pipeline_type; + using ComputePipeline = TRenderBackend::compute_pipeline_type; + using PipelineLayout = TRenderBackend::pipeline_layout_type; + using ShaderProgram = TRenderBackend::shader_program_type; + using InputAssembler = TRenderBackend::input_assembler_type; + using Rasterizer = TRenderBackend::rasterizer_type; + using FrameBuffer = TRenderBackend::frame_buffer_type; + + // Get the default device. + auto device = backend->device("Default"); + + // Create the frame buffers for all back buffers. + auto frameBuffers = std::views::iota(0u, device->swapChain().buffers()) | + std::views::transform([&](UInt32 index) { return device->makeFrameBuffer(std::format("Frame Buffer {0}", index), device->swapChain().renderArea()); }) | + std::ranges::to>>(); + + // Create input assembler state. + SharedPtr inputAssembler = device->buildInputAssembler() + .topology(PrimitiveTopology::TriangleList) + .indexType(IndexType::UInt16) + .vertexBuffer(sizeof(Vertex), 0) + .withAttribute(0, BufferFormat::XYZ32F, offsetof(Vertex, Position), AttributeSemantic::Position) + .add(); + + inputAssemblerState = std::static_pointer_cast(inputAssembler); + + // Create a geometry render pass. + UniquePtr renderPass = device->buildRenderPass("Opaque") + .renderTarget("Color Target", RenderTargetType::Present, Format::B8G8R8A8_UNORM, RenderTargetFlags::Clear, { 0.1f, 0.1f, 0.1f, 1.f }) + .renderTarget("Depth/Stencil Target", RenderTargetType::DepthStencil, Format::D32_SFLOAT, RenderTargetFlags::Clear, { 1.f, 0.f, 0.f, 0.f }); + + // Map all render targets to the frame buffer. + std::ranges::for_each(frameBuffers, [&renderPass](auto& frameBuffer) { frameBuffer->addImages(renderPass->renderTargets()); }); + + // Create the shader program. + SharedPtr shaderProgram = device->buildShaderProgram() + .withVertexShaderModule("shaders/indirect_vs." + FileExtensions::SHADER) + .withFragmentShaderModule("shaders/indirect_fs." + FileExtensions::SHADER); + + // Create a render pipeline. + UniquePtr renderPipeline = device->buildRenderPipeline(*renderPass, "Geometry") + .inputAssembler(inputAssembler) + .rasterizer(device->buildRasterizer() + .polygonMode(PolygonMode::Solid) + .cullMode(CullMode::BackFaces) + .cullOrder(CullOrder::ClockWise) + .lineWidth(1.f) + .depthState(DepthStencilState::DepthState{ .Operation = CompareOperation::LessEqual })) + .layout(shaderProgram->reflectPipelineLayout()) + .shaderProgram(shaderProgram); + + // Create culling pre-pass pipeline. + SharedPtr cullProgram = device->buildShaderProgram() + .withComputeShaderModule("shaders/indirect_cull_cs." + FileExtensions::SHADER); + + UniquePtr cullPipeline = device->buildComputePipeline("Cull") + .layout(cullProgram->reflectPipelineLayout()) + .shaderProgram(cullProgram); + + // Add the resources to the device state. + device->state().add(std::move(renderPass)); + device->state().add(std::move(renderPipeline)); + device->state().add(std::move(cullPipeline)); + std::ranges::for_each(frameBuffers, [device](auto& frameBuffer) { device->state().add(std::move(frameBuffer)); }); +} + +void SampleApp::initBuffers(IRenderBackend* backend) +{ + // Get a command buffer + auto commandBuffer = m_device->defaultQueue(QueueType::Transfer).createCommandBuffer(true); + + // Create the staging buffer. + // NOTE: The mapping works, because vertex and index buffers have an alignment of 0, so we can treat the whole buffer as a single element the size of the + // whole buffer. + auto stagedVertices = m_device->factory().createVertexBuffer(*m_inputAssembler->vertexBufferLayout(0), ResourceHeap::Staging, vertices.size()); + stagedVertices->map(vertices.data(), vertices.size() * sizeof(::Vertex), 0); + + // Create the actual vertex buffer and transfer the staging buffer into it. + auto vertexBuffer = m_device->factory().createVertexBuffer("Vertex Buffer", *m_inputAssembler->vertexBufferLayout(0), ResourceHeap::Resource, vertices.size()); + commandBuffer->transfer(asShared(std::move(stagedVertices)), *vertexBuffer, 0, 0, vertices.size()); + + // Create the staging buffer for the indices. For infos about the mapping see the note about the vertex buffer mapping above. + auto stagedIndices = m_device->factory().createIndexBuffer(*m_inputAssembler->indexBufferLayout(), ResourceHeap::Staging, indices.size()); + stagedIndices->map(indices.data(), indices.size() * m_inputAssembler->indexBufferLayout()->elementSize(), 0); + + // Create the actual index buffer and transfer the staging buffer into it. + auto indexBuffer = m_device->factory().createIndexBuffer("Index Buffer", *m_inputAssembler->indexBufferLayout(), ResourceHeap::Resource, indices.size()); + commandBuffer->transfer(asShared(std::move(stagedIndices)), *indexBuffer, 0, 0, indices.size()); + + // Initialize the camera buffer. + // NOTE: Since we bind the same resource to pipelines of different type (compute and graphics), we need two descriptor sets targeting the same buffers. + auto& cullPipeline = m_device->state().pipeline("Cull"); + auto& geometryPipeline = m_device->state().pipeline("Geometry"); + auto& cameraCullBindingLayout = cullPipeline.layout()->descriptorSet(DescriptorSets::PerFrame); + auto& cameraGeometryBindingLayout = geometryPipeline.layout()->descriptorSet(DescriptorSets::PerFrame); + auto cameraBuffer = m_device->factory().createBuffer("Camera Buffer", cameraGeometryBindingLayout, 0, ResourceHeap::Dynamic, 3); + auto cameraCullBindings = cameraCullBindingLayout.allocateMultiple(3, { + { { .resource = *cameraBuffer, .firstElement = 0, .elements = 1 } }, + { { .resource = *cameraBuffer, .firstElement = 1, .elements = 1 } }, + { { .resource = *cameraBuffer, .firstElement = 2, .elements = 1 } } + }); + auto cameraGeometryBindings = cameraGeometryBindingLayout.allocateMultiple(3, { + { { .resource = *cameraBuffer, .firstElement = 0, .elements = 1 } }, + { { .resource = *cameraBuffer, .firstElement = 1, .elements = 1 } }, + { { .resource = *cameraBuffer, .firstElement = 2, .elements = 1 } } + }); + + // Next, we create the objects buffer. + auto& objectsCullBindingLayout = cullPipeline.layout()->descriptorSet(DescriptorSets::Constant); + auto& objectsGeometryBindingLayout = geometryPipeline.layout()->descriptorSet(DescriptorSets::Constant); + auto objectsStagingBuffer = m_device->factory().createBuffer(objectsGeometryBindingLayout, 0, ResourceHeap::Staging, sizeof(ObjectBuffer) * NUM_INSTANCES, 1); + auto objectsBuffer = m_device->factory().createBuffer("Objects Buffer", objectsGeometryBindingLayout, 0, ResourceHeap::Resource, sizeof(ObjectBuffer) * NUM_INSTANCES, 1); + auto objectsCullBinding = objectsCullBindingLayout.allocate({ { .resource = *objectsBuffer } }); + auto objectsGeometryBinding = objectsGeometryBindingLayout.allocate({ { .resource = *objectsBuffer } }); + + objectsStagingBuffer->map(objects, sizeof(ObjectBuffer) * NUM_INSTANCES); + commandBuffer->transfer(asShared(std::move(objectsStagingBuffer)), *objectsBuffer); + + // Create a buffer for recording the indirect draw calls. + // + // NOTE: Reflection cannot determine, that the buffer records indirect commands, so we need to explicitly state the usage. + // NOTE: We allocate a fourth element in the counter variable, which will always be zeroed out and acts as a copy source for resetting the counter. + auto& indirectBindingLayout = cullPipeline.layout()->descriptorSet(DescriptorSets::Indirect); + auto indirectCounterBuffer = m_device->factory().createBuffer("Indirect Counter", BufferType::Indirect, ResourceHeap::Resource, sizeof(UInt32), 4, ResourceUsage::Default | ResourceUsage::AllowWrite); + auto indirectCommandsBuffer = m_device->factory().createBuffer("Indirect Commands", BufferType::Indirect, ResourceHeap::Resource, sizeof(IndirectIndexedBatch) * NUM_INSTANCES, 3, ResourceUsage::AllowWrite); + auto indirectBindings = indirectBindingLayout.allocateMultiple(3, { + { { .resource = *indirectCounterBuffer, .firstElement = 0, .elements = 1 }, { .resource = *indirectCommandsBuffer, .firstElement = 0, .elements = 1 } }, + { { .resource = *indirectCounterBuffer, .firstElement = 1, .elements = 1 }, { .resource = *indirectCommandsBuffer, .firstElement = 1, .elements = 1 } }, + { { .resource = *indirectCounterBuffer, .firstElement = 2, .elements = 1 }, { .resource = *indirectCommandsBuffer, .firstElement = 2, .elements = 1 } } + }); + + // End and submit the command buffer. + m_transferFence = commandBuffer->submit(); + + // Add everything to the state. + m_device->state().add(std::move(vertexBuffer)); + m_device->state().add(std::move(indexBuffer)); + m_device->state().add(std::move(cameraBuffer)); + m_device->state().add(std::move(objectsBuffer)); + m_device->state().add(std::move(indirectCounterBuffer)); + m_device->state().add(std::move(indirectCommandsBuffer)); + m_device->state().add("Objects Cull Bindings", std::move(objectsCullBinding)); + m_device->state().add("Objects Geometry Bindings", std::move(objectsGeometryBinding)); + std::ranges::for_each(cameraCullBindings, [this, i = 0](auto& binding) mutable { m_device->state().add(std::format("Camera Cull Bindings {0}", i++), std::move(binding)); }); + std::ranges::for_each(cameraGeometryBindings, [this, i = 0](auto& binding) mutable { m_device->state().add(std::format("Camera Geometry Bindings {0}", i++), std::move(binding)); }); + std::ranges::for_each(indirectBindings, [this, i = 0](auto& binding) mutable { m_device->state().add(std::format("Indirect Bindings {0}", i++), std::move(binding)); }); +} + +void SampleApp::updateCamera(IBuffer& buffer, UInt32 backBuffer) const +{ + // Store the initial time this method has been called first. + static auto start = std::chrono::high_resolution_clock::now(); + auto now = std::chrono::high_resolution_clock::now(); + auto time = std::chrono::duration(now - start).count(); + const float speed = 0.3f; + + glm::vec3 position = { 0.0f, 0.0f, 0.35f }; + glm::vec3 target = { std::sinf(time * speed), std::cosf(time * speed), 0.0f }; + glm::vec3 forward = glm::normalize(target - position); + glm::vec3 right = glm::normalize(glm::cross({ 0.0f, 0.0f, 1.0f }, forward)); + glm::vec3 up = glm::normalize(glm::cross(forward, right)); + const float nearPlane = 0.0001f, farPlane = 1000.0f; + + // Calculate the camera view/projection matrix. + auto aspectRatio = m_viewport->getRectangle().width() / m_viewport->getRectangle().height(); + glm::mat4 view = glm::lookAt(position, target, up); + glm::mat4 projection = glm::perspective(glm::radians(60.0f), aspectRatio, nearPlane, farPlane); + camera.ViewProjection = projection * view; + camera.Projection = projection; + camera.Position = glm::vec4(position, 1.0f); + camera.Forward = glm::vec4(forward, 1.0f); + camera.Up = glm::vec4(up, 1.0f); + camera.Right = glm::vec4(right, 1.0f); + camera.NearPlane = nearPlane; + camera.FarPlane = farPlane; + + // Compute frustum side planes. + auto projectionTransposed = glm::transpose(camera.ViewProjection); // GLM uses column-major matrices, transpose lets us index rows. + camera.Frustum[0] = ::normalizePlane(projectionTransposed[3] + projectionTransposed[0]); // Left + camera.Frustum[1] = ::normalizePlane(projectionTransposed[3] - projectionTransposed[0]); // Right + camera.Frustum[2] = ::normalizePlane(projectionTransposed[3] + projectionTransposed[1]); // Bottom + camera.Frustum[3] = ::normalizePlane(projectionTransposed[3] - projectionTransposed[1]); // Top + camera.Frustum[4] = ::normalizePlane(projectionTransposed[3] + projectionTransposed[2]); // Near + camera.Frustum[5] = ::normalizePlane(projectionTransposed[3] - projectionTransposed[2]); // Far + + // Create a staging buffer and use to transfer the new uniform buffer to. + buffer.map(reinterpret_cast(&camera), sizeof(camera), backBuffer); +} + +void SampleApp::onStartup() +{ + // Run application loop until the window is closed. + while (!::glfwWindowShouldClose(m_window.get())) + { + this->handleEvents(); + this->drawFrame(); + this->updateWindowTitle(); + } +} + +void SampleApp::onShutdown() +{ + // Destroy the window. + ::glfwDestroyWindow(m_window.get()); + ::glfwTerminate(); +} + +void SampleApp::onInit() +{ + ::glfwSetWindowUserPointer(m_window.get(), this); + + ::glfwSetFramebufferSizeCallback(m_window.get(), [](GLFWwindow* window, int width, int height) { + auto app = reinterpret_cast(::glfwGetWindowUserPointer(window)); + app->resize(width, height); + }); + + ::glfwSetKeyCallback(m_window.get(), [](GLFWwindow* window, int key, int scancode, int action, int mods) { + auto app = reinterpret_cast(::glfwGetWindowUserPointer(window)); + app->keyDown(key, scancode, action, mods); + }); + + // Initialize objects. + ::initializeObjects(); + + // Create a callback for backend startup and shutdown. + auto startCallback = [this](TBackend * backend) { + // Store the window handle. + auto window = m_window.get(); + + // Get the proper frame buffer size. + int width, height; + ::glfwGetFramebufferSize(window, &width, &height); + + // Create viewport and scissors. + m_viewport = makeShared(RectF(0.f, 0.f, static_cast(width), static_cast(height))); + m_scissor = makeShared(RectF(0.f, 0.f, static_cast(width), static_cast(height))); + + auto adapter = backend->findAdapter(m_adapterId); + + if (adapter == nullptr) + adapter = backend->findAdapter(std::nullopt); + + auto surface = backend->createSurface(::glfwGetWin32Window(window)); + + // Create the device. + m_device = backend->createDevice("Default", *adapter, std::move(surface), Format::B8G8R8A8_UNORM, Size2d(static_cast(width), static_cast(height)), 3, false, GraphicsDeviceFeatures { .DrawIndirect = true }); + + // Initialize resources. + ::initRenderGraph(backend, m_inputAssembler); + this->initBuffers(backend); + + return true; + }; + + auto stopCallback = [](TBackend * backend) { + backend->releaseDevice("Default"); + }; + +#ifdef LITEFX_BUILD_VULKAN_BACKEND + // Register the Vulkan backend de-/initializer. + this->onBackendStart(startCallback); + this->onBackendStop(stopCallback); +#endif // LITEFX_BUILD_VULKAN_BACKEND + +#ifdef LITEFX_BUILD_DIRECTX_12_BACKEND + // We do not need to provide a root signature for shader reflection (refer to the project wiki for more information: https://github.com/crud89/LiteFX/wiki/Shader-Development). + DirectX12ShaderProgram::suppressMissingRootSignatureWarning(); + + // Register the DirectX 12 backend de-/initializer. + this->onBackendStart(startCallback); + this->onBackendStop(stopCallback); +#endif // LITEFX_BUILD_DIRECTX_12_BACKEND +} + +void SampleApp::onResize(const void* sender, ResizeEventArgs e) +{ + // In order to re-create the swap chain, we need to wait for all frames in flight to finish. + m_device->wait(); + + // Resize the frame buffer and recreate the swap chain. + auto surfaceFormat = m_device->swapChain().surfaceFormat(); + auto renderArea = Size2d(e.width(), e.height()); + auto vsync = m_device->swapChain().verticalSynchronization(); + m_device->swapChain().reset(surfaceFormat, renderArea, 3, vsync); + + // Resize the frame buffers. Note that we could also use an event handler on the swap chain `reseted` event to do this automatically instead. + m_device->state().frameBuffer("Frame Buffer 0").resize(renderArea); + m_device->state().frameBuffer("Frame Buffer 1").resize(renderArea); + m_device->state().frameBuffer("Frame Buffer 2").resize(renderArea); + + // Also resize viewport and scissor. + m_viewport->setRectangle(RectF(0.f, 0.f, static_cast(e.width()), static_cast(e.height()))); + m_scissor->setRectangle(RectF(0.f, 0.f, static_cast(e.width()), static_cast(e.height()))); +} + +void SampleApp::keyDown(int key, int scancode, int action, int mods) +{ +#ifdef LITEFX_BUILD_VULKAN_BACKEND + if (key == GLFW_KEY_F9 && action == GLFW_PRESS) + this->startBackend(); +#endif // LITEFX_BUILD_VULKAN_BACKEND + +#ifdef LITEFX_BUILD_DIRECTX_12_BACKEND + if (key == GLFW_KEY_F10 && action == GLFW_PRESS) + this->startBackend(); +#endif // LITEFX_BUILD_DIRECTX_12_BACKEND + + if (key == GLFW_KEY_F8 && action == GLFW_PRESS) + { + static RectI windowRect; + + // Check if we're switching from fullscreen to windowed or the other way around. + if (::glfwGetWindowMonitor(m_window.get()) == nullptr) + { + // Find the monitor, that contains most of the window. + RectI clientRect, monitorRect; + GLFWmonitor* currentMonitor = nullptr; + const GLFWvidmode* currentVideoMode = nullptr; + int monitorCount; + + ::glfwGetWindowPos(m_window.get(), &clientRect.x(), &clientRect.y()); + ::glfwGetWindowSize(m_window.get(), &clientRect.width(), &clientRect.height()); + auto monitors = ::glfwGetMonitors(&monitorCount); + int highestOverlap = 0; + + for (int i(0); i < monitorCount; ++i) + { + auto monitor = monitors[i]; + auto mode = ::glfwGetVideoMode(monitor); + ::glfwGetMonitorPos(monitor, &monitorRect.x(), &monitorRect.y()); + monitorRect.width() = mode->width; + monitorRect.height() = mode->height; + + auto overlap = + std::max(0, std::min(clientRect.x() + clientRect.width(), monitorRect.x() + monitorRect.width()) - std::max(clientRect.x(), monitorRect.x())) * + std::max(0, std::min(clientRect.y() + clientRect.height(), monitorRect.y() + monitorRect.height()) - std::max(clientRect.y(), monitorRect.y())); + + if (highestOverlap < overlap) + { + highestOverlap = overlap; + currentMonitor = monitor; + currentVideoMode = mode; + } + } + + // Save the current window rect in order to restore it later. + windowRect = clientRect; + + // Switch to fullscreen. + if (currentVideoMode != nullptr) + ::glfwSetWindowMonitor(m_window.get(), currentMonitor, 0, 0, currentVideoMode->width, currentVideoMode->height, currentVideoMode->refreshRate); + } + else + { + // NOTE: If we were to launch in fullscreen mode, we should use something like `max(windowRect.width(), defaultWidth)`. + ::glfwSetWindowMonitor(m_window.get(), nullptr, windowRect.x(), windowRect.y(), windowRect.width(), windowRect.height(), 0); + } + } + + if (key == GLFW_KEY_ESCAPE && action == GLFW_PRESS) + { + // Close the window with the next loop. + ::glfwSetWindowShouldClose(m_window.get(), GLFW_TRUE); + } +} + +void SampleApp::updateWindowTitle() +{ + static auto lastTime = std::chrono::high_resolution_clock::now(); + auto frameTime = std::chrono::duration(std::chrono::high_resolution_clock::now() - lastTime).count(); + + std::stringstream title; + title << this->name() << " | " << "Backend: " << this->activeBackend(BackendType::Rendering)->name() << " | " << static_cast(1000.0f / frameTime) << " FPS"; + + ::glfwSetWindowTitle(m_window.get(), title.str().c_str()); + lastTime = std::chrono::high_resolution_clock::now(); +} + +void SampleApp::handleEvents() +{ + ::glfwPollEvents(); +} + +void SampleApp::drawFrame() +{ + // Swap the back buffers for the next frame. + auto backBuffer = m_device->swapChain().swapBackBuffer(); + + // Query state. For performance reasons, those state variables should be cached for more complex applications, instead of looking them up every frame. + auto& frameBuffer = m_device->state().frameBuffer(std::format("Frame Buffer {0}", backBuffer)); + auto& renderPass = m_device->state().renderPass("Opaque"); + auto& geometryPipeline = m_device->state().pipeline("Geometry"); + auto& cullPipeline = m_device->state().pipeline("Cull"); + auto& cameraBuffer = m_device->state().buffer("Camera Buffer"); + auto& cameraGeometryBindings = m_device->state().descriptorSet(std::format("Camera Geometry Bindings {0}", backBuffer)); + auto& cameraCullBindings = m_device->state().descriptorSet(std::format("Camera Cull Bindings {0}", backBuffer)); + auto& indirectCounterBuffer = m_device->state().buffer("Indirect Counter"); + auto& indirectCommandsBuffer = m_device->state().buffer("Indirect Commands"); + auto& indirectBindings = m_device->state().descriptorSet(std::format("Indirect Bindings {0}", backBuffer)); + auto& vertexBuffer = m_device->state().vertexBuffer("Vertex Buffer"); + auto& indexBuffer = m_device->state().indexBuffer("Index Buffer"); + auto& objectsGeometryBindings = m_device->state().descriptorSet("Objects Geometry Bindings"); + auto& objectsCullBindings = m_device->state().descriptorSet("Objects Cull Bindings"); + + // Wait for all transfers to finish. + auto& queue = renderPass.commandQueue(); + queue.waitFor(m_device->defaultQueue(QueueType::Transfer), m_transferFence); + + // Create a command buffer to execute the cull pass on. + auto cullCommands = queue.createCommandBuffer(true); + + // Start by updating the camera. + this->updateCamera(cameraBuffer, backBuffer); + + // Clear the counter. + cullCommands->transfer(indirectCounterBuffer, indirectCounterBuffer, 3, backBuffer); + + // Bind cull pipeline and all descriptor sets. + cullCommands->use(cullPipeline); + cullCommands->bind(cameraCullBindings); + cullCommands->bind(objectsCullBindings); + cullCommands->bind(indirectBindings); + + // Dispatch cull pass. + cullCommands->dispatch({ NUM_INSTANCES / 128, 1, 1 }); + + // Submit the cull pass commands. + queue.submit(cullCommands); + + // Begin rendering on the render pass and use the only pipeline we've created for it. + renderPass.begin(frameBuffer); + auto commandBuffer = renderPass.commandBuffer(0); + commandBuffer->use(geometryPipeline); + commandBuffer->setViewports(m_viewport.get()); + commandBuffer->setScissors(m_scissor.get()); + + // Bind both descriptor sets to the pipeline. + commandBuffer->bind(cameraGeometryBindings); + commandBuffer->bind(objectsGeometryBindings); + + // Bind the vertex and index buffers. + commandBuffer->bind(vertexBuffer); + commandBuffer->bind(indexBuffer); + + // Draw the object and present the frame by ending the render pass. + commandBuffer->drawIndexedIndirect(indirectCommandsBuffer, indirectCounterBuffer, backBuffer * indirectCommandsBuffer.alignedElementSize(), backBuffer * indirectCounterBuffer.alignedElementSize()); + renderPass.end(); +} \ No newline at end of file diff --git a/src/Samples/Indirect/src/sample.h b/src/Samples/Indirect/src/sample.h new file mode 100644 index 00000000..503b4a63 --- /dev/null +++ b/src/Samples/Indirect/src/sample.h @@ -0,0 +1,113 @@ +#pragma once + +#define LITEFX_AUTO_IMPORT_BACKEND_HEADERS +#include + +#if (defined _WIN32 || defined WINCE) +# define GLFW_EXPOSE_NATIVE_WIN32 +#else +# pragma message ("Indirect Sample: No supported surface platform detected.") +#endif + +#include +#include +#include + +#include "config.h" + +#ifdef LITEFX_BUILD_EXAMPLES_RENDERDOC_LOADER +#include + +extern RENDERDOC_API_1_5_0* renderDoc; +#endif + +using namespace LiteFX; +using namespace LiteFX::Rendering; +using namespace LiteFX::Rendering::Backends; + +struct GlfwWindowDeleter { + void operator()(GLFWwindow* ptr) noexcept { + ::glfwDestroyWindow(ptr); + } +}; + +typedef UniquePtr GlfwWindowPtr; + +class SampleApp : public LiteFX::App { +public: + static String Name() noexcept { return "LiteFX Sample: Indirect Drawing"; } + String name() const noexcept override { return Name(); } + + static AppVersion Version() noexcept { return AppVersion(1, 0, 0, 0); } + AppVersion version() const noexcept override { return Version(); } + +private: + /// + /// Stores the GLFW window pointer. + /// + GlfwWindowPtr m_window; + + /// + /// Stores the preferred adapter ID (std::nullopt, if the default adapter is used). + /// + Optional m_adapterId; + + /// + /// Stores a reference of the input assembler state. + /// + SharedPtr m_inputAssembler; + + /// + /// Stores the viewport. + /// + SharedPtr m_viewport; + + /// + /// Stores the scissor. + /// + SharedPtr m_scissor; + + /// + /// Stores a pointer to the currently active device. + /// + IGraphicsDevice* m_device; + + /// + /// Stores the fence created at application load time. + /// + UInt64 m_transferFence = 0; + +public: + SampleApp(GlfwWindowPtr&& window, Optional adapterId) : + App(), m_window(std::move(window)), m_adapterId(adapterId), m_device(nullptr) + { + this->initializing += std::bind(&SampleApp::onInit, this); + this->startup += std::bind(&SampleApp::onStartup, this); + this->resized += std::bind(&SampleApp::onResize, this, std::placeholders::_1, std::placeholders::_2); + this->shutdown += std::bind(&SampleApp::onShutdown, this); + } + +private: + /// + /// Initializes the buffers. + /// + /// The render backend to use. + void initBuffers(IRenderBackend* backend); + + /// + /// Updates the camera buffer. This needs to be done whenever the frame buffer changes, since we need to pass changes in the aspect ratio to the view/projection matrix. + /// + void updateCamera(IBuffer& buffer, UInt32 backBuffer) const; + +private: + void onInit(); + void onStartup(); + void onShutdown(); + void onResize(const void* sender, ResizeEventArgs e); + +public: + void keyDown(int key, int scancode, int action, int mods); + void handleEvents(); + void drawFrame(); + void updateWindowTitle(); +}; \ No newline at end of file diff --git a/src/Samples/Textures/src/sample.cpp b/src/Samples/Textures/src/sample.cpp index ad999e6b..c813ab8c 100644 --- a/src/Samples/Textures/src/sample.cpp +++ b/src/Samples/Textures/src/sample.cpp @@ -138,7 +138,7 @@ void loadTexture(TDevice& device, UniquePtr& texture, UniquePtrbarrier(*barrier);