Skip to content

Commit

Permalink
Adding support for limiting ommArraySize (#16)
Browse files Browse the repository at this point in the history
  • Loading branch information
nv-jdeligiannis committed Feb 7, 2023
1 parent 07a79bf commit ebb7ee9
Show file tree
Hide file tree
Showing 15 changed files with 279 additions and 160 deletions.
2 changes: 1 addition & 1 deletion integration/omm-sdk-nvrhi/omm-sdk-nvrhi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -649,8 +649,8 @@ omm::Gpu::DispatchConfigDesc GpuBakeNvrhiImpl::GetConfig(const GpuBakeNvrhi::Inp
config.indexCount = (uint32_t)params.numIndices;
config.globalFormat = params.format == nvrhi::rt::OpacityMicromapFormat::OC1_2_State ? Format::OC1_2_State : Format::OC1_4_State;
config.maxScratchMemorySize = params.minimalMemoryMode ? Gpu::ScratchMemoryBudget::MB_4 : Gpu::ScratchMemoryBudget::MB_256;
config.maxOutOmmArraySize = params.maxOutOmmArraySize;
config.maxSubdivisionLevel = params.maxSubdivisionLevel;
config.globalSubdivisionLevel = params.maxSubdivisionLevel;
config.dynamicSubdivisionScale = params.dynamicSubdivisionScale;
return config;
}
Expand Down
1 change: 1 addition & 0 deletions integration/omm-sdk-nvrhi/omm-sdk-nvrhi.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ namespace omm
uint32_t numIndices = 0;

uint32_t maxSubdivisionLevel = 0;
uint32_t maxOutOmmArraySize = 0xFFFFFFFF;
nvrhi::rt::OpacityMicromapFormat format = nvrhi::rt::OpacityMicromapFormat::OC1_4_State;
float dynamicSubdivisionScale = 0.5f;
bool minimalMemoryMode = false;
Expand Down
19 changes: 11 additions & 8 deletions omm-sdk/include/omm.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ license agreement from NVIDIA CORPORATION is strictly prohibited.

#define OMM_VERSION_MAJOR 0
#define OMM_VERSION_MINOR 9
#define OMM_VERSION_BUILD 1
#define OMM_VERSION_BUILD 2

#if defined(_MSC_VER)
#define OMM_CALL __fastcall
Expand Down Expand Up @@ -788,7 +788,7 @@ typedef struct ommGpuPreDispatchInfo
uint32_t outOmmIndexCount;
// Min required size of OUT_OMM_ARRAY_DATA. GetBakeInfo returns most conservative estimation while less conservative number
// can be obtained via BakePrepass
size_t outOmmArraySizeInBytes;
uint32_t outOmmArraySizeInBytes;
// Min required size of OUT_OMM_DESC_ARRAY. GetBakeInfo returns most conservative estimation while less conservative number
// can be obtained via BakePrepass
uint32_t outOmmDescSizeInBytes;
Expand Down Expand Up @@ -848,12 +848,15 @@ typedef struct ommGpuDispatchConfigDesc
float dynamicSubdivisionScale;
// The global Format. May be overriden by the per-triangle config.
ommFormat globalFormat;
// Micro triangle count is 4^N, where N is the subdivision level. Subdivision level must be in range [0,
// MaxSubdivisionLevel]. The global subdivisionLevel. May be overriden by the per-triangle subdivision level setting. The
// subdivision level to allow in dynamic mode and value is used to allocate appropriate scratch memory.
uint8_t globalSubdivisionLevel;
uint8_t maxSubdivisionLevel;
uint8_t enableSubdivisionLevelBuffer;
ommBool enableSubdivisionLevelBuffer;
// The SDK will try to limit the omm array size of PreDispatchInfo::outOmmArraySizeInBytes and
// PostBakeInfo::outOmmArraySizeInBytes.
// Currently a greedy algorithm is implemented with a first come-first serve order.
// The SDK may (or may not) apply more sophisticated heuristics in the future.
// If no memory is available to allocate an OMM Array Block the state will default to Unknown Opaque (ignoring any bake
// flags do disable special indices).
uint32_t maxOutOmmArraySize;
// Target scratch memory budget, The SDK will try adjust the sum of the transient pool buffers to match this value. Higher
// budget more efficiently executes the baking operation. May return INSUFFICIENT_SCRATCH_MEMORY if set too low.
ommGpuScratchMemoryBudget maxScratchMemorySize;
Expand All @@ -877,9 +880,9 @@ inline ommGpuDispatchConfigDesc ommGpuDispatchConfigDescDefault()
v.alphaCutoff = 0.5f;
v.dynamicSubdivisionScale = 2;
v.globalFormat = ommFormat_OC1_4_State;
v.globalSubdivisionLevel = 4;
v.maxSubdivisionLevel = 8;
v.enableSubdivisionLevelBuffer = 0;
v.maxOutOmmArraySize = 0xFFFFFFFF;
v.maxScratchMemorySize = ommGpuScratchMemoryBudget_Default;
return v;
}
Expand Down
15 changes: 9 additions & 6 deletions omm-sdk/include/omm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -683,7 +683,7 @@ namespace omm
uint32_t outOmmIndexCount = 0xFFFFFFFF;
// Min required size of OUT_OMM_ARRAY_DATA. GetBakeInfo returns most conservative estimation while less conservative number
// can be obtained via BakePrepass
size_t outOmmArraySizeInBytes = 0xFFFFFFFF;
uint32_t outOmmArraySizeInBytes = 0xFFFFFFFF;
// Min required size of OUT_OMM_DESC_ARRAY. GetBakeInfo returns most conservative estimation while less conservative number
// can be obtained via BakePrepass
uint32_t outOmmDescSizeInBytes = 0xFFFFFFFF;
Expand Down Expand Up @@ -728,12 +728,15 @@ namespace omm
float dynamicSubdivisionScale = 2;
// The global Format. May be overriden by the per-triangle config.
Format globalFormat = Format::OC1_4_State;
// Micro triangle count is 4^N, where N is the subdivision level. Subdivision level must be in range [0,
// MaxSubdivisionLevel]. The global subdivisionLevel. May be overriden by the per-triangle subdivision level setting. The
// subdivision level to allow in dynamic mode and value is used to allocate appropriate scratch memory.
uint8_t globalSubdivisionLevel = 4;
uint8_t maxSubdivisionLevel = 8;
uint8_t enableSubdivisionLevelBuffer = 0;
bool enableSubdivisionLevelBuffer = false;
// The SDK will try to limit the omm array size of PreDispatchInfo::outOmmArraySizeInBytes and
// PostBakeInfo::outOmmArraySizeInBytes.
// Currently a greedy algorithm is implemented with a first come-first serve order.
// The SDK may (or may not) apply more sophisticated heuristics in the future.
// If no memory is available to allocate an OMM Array Block the state will default to Unknown Opaque (ignoring any bake
// flags do disable special indices).
uint32_t maxOutOmmArraySize = 0xFFFFFFFF;
// Target scratch memory budget, The SDK will try adjust the sum of the transient pool buffers to match this value. Higher
// budget more efficiently executes the baking operation. May return INSUFFICIENT_SCRATCH_MEMORY if set too low.
ScratchMemoryBudget maxScratchMemorySize = ScratchMemoryBudget::Default;
Expand Down
18 changes: 9 additions & 9 deletions omm-sdk/scripts/omm.json
Original file line number Diff line number Diff line change
Expand Up @@ -1824,21 +1824,21 @@
},
"comment": "The global Format. May be overriden by the per-triangle config."
},
{
"type": "uint8_t",
"name": "globalSubdivisionLevel",
"value": "4",
"comment": "Micro triangle count is 4^N, where N is the subdivision level. Subdivision level must be in range [0, MaxSubdivisionLevel]. The global subdivisionLevel. May be overriden by the per-triangle subdivision level setting. The subdivision level to allow in dynamic mode and value is used to allocate appropriate scratch memory."
},
{
"type": "uint8_t",
"name": "maxSubdivisionLevel",
"value": "8"
},
{
"type": "uint8_t",
"type": "bool",
"name": "enableSubdivisionLevelBuffer",
"value": "0"
"value": "false"
},
{
"type": "uint32_t",
"name": "maxOutOmmArraySize",
"value": "0xFFFFFFFF",
"comment": "The SDK will try to limit the omm array size of PreDispatchInfo::outOmmArraySizeInBytes and PostBakeInfo::outOmmArraySizeInBytes.\nCurrently a greedy algorithm is implemented with a first come-first serve order.\nThe SDK may (or may not) apply more sophisticated heuristics in the future.\nIf no memory is available to allocate an OMM Array Block the state will default to Unknown Opaque (ignoring any bake flags do disable special indices)."
},
{
"type": "ScratchMemoryBudget",
Expand Down Expand Up @@ -1915,7 +1915,7 @@
"value": "0xFFFFFFFF"
},
{
"type": "size_t",
"type": "uint32_t",
"name": "outOmmArraySizeInBytes",
"value": "0xFFFFFFFF",
"comment": "Min required size of OUT_OMM_ARRAY_DATA. GetBakeInfo returns most conservative estimation while less conservative number can be obtained via BakePrepass"
Expand Down
2 changes: 1 addition & 1 deletion omm-sdk/scripts/omm_header_c.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ license agreement from NVIDIA CORPORATION is strictly prohibited.

#define OMM_VERSION_MAJOR 0
#define OMM_VERSION_MINOR 9
#define OMM_VERSION_BUILD 1
#define OMM_VERSION_BUILD 2

#if defined(_MSC_VER)
#define OMM_CALL __fastcall
Expand Down
2 changes: 1 addition & 1 deletion omm-sdk/shaders/omm_desc_patch.cs.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ uint GetSourcePrimitiveIndex(uint primitiveIndex)

if (primitiveIndexOrHashTableEntryIndex < -4)
{
const uint hashTableEntryIndex = -(primitiveIndexOrHashTableEntryIndex + 4);
const uint hashTableEntryIndex = -(primitiveIndexOrHashTableEntryIndex + 5);
const uint primitiveIndexRef = OMM_SUBRESOURCE_LOAD(HashTableBuffer, 8 * hashTableEntryIndex + 4); // [hash|primitiveIndex]
return primitiveIndexRef;
}
Expand Down
5 changes: 1 addition & 4 deletions omm-sdk/shaders/omm_global_cb.hlsli
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,12 @@ and any modifications thereto. Any use, reproduction, disclosure or
distribution of this software and related documentation without an express
license agreement from NVIDIA CORPORATION is strictly prohibited.
*/


#define OMM_DECLARE_GLOBAL_CONSTANT_BUFFER \
OMM_CONSTANTS_START(GlobalConstants) \
OMM_CONSTANT(uint, IndexCount) \
OMM_CONSTANT(uint, PrimitiveCount) \
OMM_CONSTANT(uint, MaxBatchCount) \
OMM_CONSTANT(uint, GlobalSubdivisionLevel) \
OMM_CONSTANT(uint, MaxOutOmmArraySize) \
\
OMM_CONSTANT(uint, IsOmmIndexFormat16bit) \
OMM_CONSTANT(uint, EnableSpecialIndices) \
Expand Down Expand Up @@ -45,7 +43,6 @@ OMM_CONSTANTS_START(GlobalConstants) \
\
OMM_CONSTANT(float2, TexSize) \
OMM_CONSTANT(float2, InvTexSize) \
\
/* ---- Buffer offsets go here */\
\
OMM_CONSTANT(uint, IEBakeBufferOffset) \
Expand Down
2 changes: 1 addition & 1 deletion omm-sdk/shaders/omm_post_build_info.cs.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,6 @@ void main(uint3 tid : SV_DispatchThreadID)
const uint ommDescCount = OMM_SUBRESOURCE_LOAD(OmmDescAllocatorCounterBuffer, 0);
const uint ommDescByteSize = ommDescCount * 8;

u_postBuildInfo.Store(0, ommArrayByteSize);
u_postBuildInfo.Store(0, min(ommArrayByteSize, g_GlobalConstants.MaxOutOmmArraySize));
u_postBuildInfo.Store(4, ommDescByteSize);
}
16 changes: 11 additions & 5 deletions omm-sdk/shaders/omm_work_setup_common.hlsli
Original file line number Diff line number Diff line change
Expand Up @@ -203,19 +203,25 @@ uint GetSubdivisionLevel(TexCoords texCoords)
}
else
{
return g_GlobalConstants.GlobalSubdivisionLevel;
return g_GlobalConstants.MaxSubdivisionLevel;
}
}

int GetOmmDescOffset(ByteAddressBuffer ommIndexBuffer, uint primitiveIndex)
{
// TODO: support 16-bit indices.
if (g_GlobalConstants.IsOmmIndexFormat16bit)
{
const uint dwOffset = primitiveIndex.x >> 1u;
const uint shift = (primitiveIndex.x & 1u) << 4u; // 0 or 16
const uint val = ommIndexBuffer.Load(4 * dwOffset);
return (val >> shift) & 0xFFFF;
const uint shift = (primitiveIndex.x & 1u) << 4u; // 0 or 16
const uint raw = ommIndexBuffer.Load(4 * dwOffset);
const uint raw16 = (raw >> shift) & 0xFFFFu;

if (raw16 > 0xFFFB) // e.g special index
{
return (raw16 - 0xFFFF) - 1; // -1, -2, -3 or -4
}

return raw16;
}
else
{
Expand Down
9 changes: 4 additions & 5 deletions omm-sdk/shaders/omm_work_setup_cs.cs.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ void main(uint3 tid : SV_DispatchThreadID)
uint hashTableEntryIndex;
hashTable::Result result = FindOrInsertOMMEntry(texCoords, subdivisionLevel, hashTableEntryIndex);

uint vmDescOffset = 0;
int vmDescOffset = (int)SpecialIndex::FullyUnknownOpaque;

if (result == hashTable::Result::Null ||
result == hashTable::Result::Inserted ||
result == hashTable::Result::ReachedMaxAttemptCount)
Expand All @@ -66,9 +67,7 @@ void main(uint3 tid : SV_DispatchThreadID)
OMM_SUBRESOURCE_INTERLOCKEDADD(OmmArrayAllocatorCounterBuffer, 0, vmDataByteSize, vmArrayOffset);
}

const uint kMaxVmArrayBudget = 0xFFFFFFFF;

if ((vmArrayOffset + vmDataByteSize) < kMaxVmArrayBudget)
if ((vmArrayOffset + vmDataByteSize) <= g_GlobalConstants.MaxOutOmmArraySize)
{
// Allocate new VM-desc for the vmArrayOffset
{
Expand Down Expand Up @@ -147,7 +146,7 @@ void main(uint3 tid : SV_DispatchThreadID)
else // if (status == hashTable::Result::Found
{
// Store the hash-table offset and patch up the pointers later.
vmDescOffset = (uint)(-hashTableEntryIndex - 4);
vmDescOffset = (uint)(-hashTableEntryIndex - 5);
}

OMM_SUBRESOURCE_STORE(TempOmmIndexBuffer, 4 * primitiveIndex, vmDescOffset);
Expand Down
Loading

0 comments on commit ebb7ee9

Please sign in to comment.