diff --git a/doc/classes/Performance.xml b/doc/classes/Performance.xml index 6bb71932dd22..66078d2642d2 100644 --- a/doc/classes/Performance.xml +++ b/doc/classes/Performance.xml @@ -224,7 +224,22 @@ Number of active navigation obstacles in the [NavigationServer3D]. - + + Number of pipeline compilations that were triggered by the 2D canvas renderer. + + + Number of pipeline compilations that were triggered by loading meshes. These compilations will show up as longer loading times the first time a user runs the game and the pipeline is required. + + + Number of pipeline compilations that were triggered by building the surface cache before rendering the scene. These compilations will show up as a stutter when loading an scene the first time a user runs the game and the pipeline is required. + + + Number of pipeline compilations that were triggered while drawing the scene. These compilations will show up as stutters during gameplay the first time a user runs the game and the pipeline is required. + + + Number of pipeline compilations that were triggered to optimize the current scene. These compilations are done in the background and should not cause any stutters whatsoever. + + Represents the size of the [enum Monitor] enum. diff --git a/doc/classes/RenderingServer.xml b/doc/classes/RenderingServer.xml index a57f6adec8d0..9139a161b434 100644 --- a/doc/classes/RenderingServer.xml +++ b/doc/classes/RenderingServer.xml @@ -5687,6 +5687,39 @@ Video memory used (in bytes). When using the Forward+ or mobile rendering backends, this is always greater than the sum of [constant RENDERING_INFO_TEXTURE_MEM_USED] and [constant RENDERING_INFO_BUFFER_MEM_USED], since there is miscellaneous data not accounted for by those two metrics. When using the GL Compatibility backend, this is equal to the sum of [constant RENDERING_INFO_TEXTURE_MEM_USED] and [constant RENDERING_INFO_BUFFER_MEM_USED]. + + Number of pipeline compilations that were triggered by the 2D canvas renderer. + + + Number of pipeline compilations that were triggered by loading meshes. These compilations will show up as longer loading times the first time a user runs the game and the pipeline is required. + + + Number of pipeline compilations that were triggered by building the surface cache before rendering the scene. These compilations will show up as a stutter when loading an scene the first time a user runs the game and the pipeline is required. + + + Number of pipeline compilations that were triggered while drawing the scene. These compilations will show up as stutters during gameplay the first time a user runs the game and the pipeline is required. + + + Number of pipeline compilations that were triggered to optimize the current scene. These compilations are done in the background and should not cause any stutters whatsoever. + + + Pipeline compilation that was triggered by the 2D canvas renderer. + + + Pipeline compilation that was triggered by loading a mesh. + + + Pipeline compilation that was triggered by building the surface cache before rendering the scene. + + + Pipeline compilation that was triggered while drawing the scene. + + + Pipeline compilation that was triggered to optimize the current scene. + + + Represents the size of the [enum PipelineSource] enum. + diff --git a/drivers/d3d12/rendering_device_driver_d3d12.cpp b/drivers/d3d12/rendering_device_driver_d3d12.cpp index 52883de45e67..8271d4b7e38d 100644 --- a/drivers/d3d12/rendering_device_driver_d3d12.cpp +++ b/drivers/d3d12/rendering_device_driver_d3d12.cpp @@ -481,44 +481,6 @@ void RenderingDeviceDriverD3D12::_debug_message_func(D3D12_MESSAGE_CATEGORY p_ca } } -/****************/ -/**** MEMORY ****/ -/****************/ - -static const uint32_t SMALL_ALLOCATION_MAX_SIZE = 4096; - -#ifdef USE_SMALL_ALLOCS_POOL -D3D12MA::Pool *RenderingDeviceDriverD3D12::_find_or_create_small_allocs_pool(D3D12_HEAP_TYPE p_heap_type, D3D12_HEAP_FLAGS p_heap_flags) { - D3D12_HEAP_FLAGS effective_heap_flags = p_heap_flags; - if (allocator->GetD3D12Options().ResourceHeapTier != D3D12_RESOURCE_HEAP_TIER_1) { - // Heap tier 2 allows mixing resource types liberally. - effective_heap_flags &= ~(D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS | D3D12_HEAP_FLAG_ALLOW_ONLY_NON_RT_DS_TEXTURES | D3D12_HEAP_FLAG_ALLOW_ONLY_RT_DS_TEXTURES); - } - - AllocPoolKey pool_key; - pool_key.heap_type = p_heap_type; - pool_key.heap_flags = effective_heap_flags; - if (small_allocs_pools.has(pool_key.key)) { - return small_allocs_pools[pool_key.key].Get(); - } - -#ifdef DEV_ENABLED - print_verbose("Creating D3D12MA small objects pool for heap type " + itos(p_heap_type) + " and heap flags " + itos(p_heap_flags)); -#endif - - D3D12MA::POOL_DESC poolDesc = {}; - poolDesc.HeapProperties.Type = p_heap_type; - poolDesc.HeapFlags = effective_heap_flags; - - ComPtr pool; - HRESULT res = allocator->CreatePool(&poolDesc, pool.GetAddressOf()); - small_allocs_pools[pool_key.key] = pool; // Don't try to create it again if failed the first time. - ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), nullptr, "CreatePool failed with error " + vformat("0x%08ux", (uint64_t)res) + "."); - - return pool.Get(); -} -#endif - /******************/ /**** RESOURCE ****/ /******************/ @@ -533,13 +495,9 @@ static const D3D12_RESOURCE_DIMENSION RD_TEXTURE_TYPE_TO_D3D12_RESOURCE_DIMENSIO D3D12_RESOURCE_DIMENSION_TEXTURE2D, }; -void RenderingDeviceDriverD3D12::_resource_transition_batch(ResourceInfo *p_resource, uint32_t p_subresource, uint32_t p_num_planes, D3D12_RESOURCE_STATES p_new_state) { +void RenderingDeviceDriverD3D12::_resource_transition_batch(CommandBufferInfo *p_command_buffer, ResourceInfo *p_resource, uint32_t p_subresource, uint32_t p_num_planes, D3D12_RESOURCE_STATES p_new_state) { DEV_ASSERT(p_subresource != UINT32_MAX); // We don't support an "all-resources" command here. -#ifdef DEBUG_COUNT_BARRIERS - uint64_t start = OS::get_singleton()->get_ticks_usec(); -#endif - ResourceInfo::States *res_states = p_resource->states_ptr; D3D12_RESOURCE_STATES *curr_state = &res_states->subresource_states[p_subresource]; @@ -549,21 +507,21 @@ void RenderingDeviceDriverD3D12::_resource_transition_batch(ResourceInfo *p_reso bool redundant_transition = any_state_is_common ? *curr_state == p_new_state : ((*curr_state) & p_new_state) == p_new_state; if (redundant_transition) { bool just_written = *curr_state == D3D12_RESOURCE_STATE_UNORDERED_ACCESS; - bool needs_uav_barrier = just_written && res_states->last_batch_with_uav_barrier != res_barriers_batch; + bool needs_uav_barrier = just_written && res_states->last_batch_with_uav_barrier != p_command_buffer->res_barriers_batch; if (needs_uav_barrier) { - if (res_barriers.size() < res_barriers_count + 1) { - res_barriers.resize(res_barriers_count + 1); + if (p_command_buffer->res_barriers.size() < p_command_buffer->res_barriers_count + 1) { + p_command_buffer->res_barriers.resize(p_command_buffer->res_barriers_count + 1); } - res_barriers[res_barriers_count] = CD3DX12_RESOURCE_BARRIER::UAV(p_resource->resource); - res_barriers_count++; - res_states->last_batch_with_uav_barrier = res_barriers_batch; + p_command_buffer->res_barriers[p_command_buffer->res_barriers_count] = CD3DX12_RESOURCE_BARRIER::UAV(p_resource->resource); + p_command_buffer->res_barriers_count++; + res_states->last_batch_with_uav_barrier = p_command_buffer->res_barriers_batch; } } else { uint64_t subres_mask_piece = ((uint64_t)1 << (p_subresource & 0b111111)); uint8_t subres_qword = p_subresource >> 6; - if (res_barriers_requests.has(res_states)) { - BarrierRequest &br = res_barriers_requests.get(res_states); + if (p_command_buffer->res_barriers_requests.has(res_states)) { + BarrierRequest &br = p_command_buffer->res_barriers_requests.get(res_states); DEV_ASSERT(br.dx_resource == p_resource->resource); DEV_ASSERT(br.subres_mask_qwords == STEPIFY(res_states->subresource_states.size(), 64) / 64); DEV_ASSERT(br.planes == p_num_planes); @@ -681,7 +639,7 @@ void RenderingDeviceDriverD3D12::_resource_transition_batch(ResourceInfo *p_reso } } } else { - BarrierRequest &br = res_barriers_requests[res_states]; + BarrierRequest &br = p_command_buffer->res_barriers_requests[res_states]; br.dx_resource = p_resource->resource; br.subres_mask_qwords = STEPIFY(p_resource->states_ptr->subresource_states.size(), 64) / 64; CRASH_COND(p_resource->states_ptr->subresource_states.size() > BarrierRequest::MAX_SUBRESOURCES); @@ -697,18 +655,10 @@ void RenderingDeviceDriverD3D12::_resource_transition_batch(ResourceInfo *p_reso br.groups_count = 1; } } - -#ifdef DEBUG_COUNT_BARRIERS - frame_barriers_cpu_time += OS::get_singleton()->get_ticks_usec() - start; -#endif } -void RenderingDeviceDriverD3D12::_resource_transitions_flush(ID3D12GraphicsCommandList *p_cmd_list) { -#ifdef DEBUG_COUNT_BARRIERS - uint64_t start = OS::get_singleton()->get_ticks_usec(); -#endif - - for (const KeyValue &E : res_barriers_requests) { +void RenderingDeviceDriverD3D12::_resource_transitions_flush(CommandBufferInfo *p_command_buffer) { + for (const KeyValue &E : p_command_buffer->res_barriers_requests) { ResourceInfo::States *res_states = E.key; const BarrierRequest &br = E.value; @@ -760,22 +710,22 @@ void RenderingDeviceDriverD3D12::_resource_transitions_flush(ID3D12GraphicsComma // Hurray!, we can do a single barrier (plus maybe a UAV one, too). bool just_written = res_states->subresource_states[0] == D3D12_RESOURCE_STATE_UNORDERED_ACCESS; - bool needs_uav_barrier = just_written && res_states->last_batch_with_uav_barrier != res_barriers_batch; + bool needs_uav_barrier = just_written && res_states->last_batch_with_uav_barrier != p_command_buffer->res_barriers_batch; uint32_t needed_barriers = (needs_uav_barrier ? 1 : 0) + 1; - if (res_barriers.size() < res_barriers_count + needed_barriers) { - res_barriers.resize(res_barriers_count + needed_barriers); + if (p_command_buffer->res_barriers.size() < p_command_buffer->res_barriers_count + needed_barriers) { + p_command_buffer->res_barriers.resize(p_command_buffer->res_barriers_count + needed_barriers); } if (needs_uav_barrier) { - res_barriers[res_barriers_count] = CD3DX12_RESOURCE_BARRIER::UAV(br.dx_resource); - res_barriers_count++; - res_states->last_batch_with_uav_barrier = res_barriers_batch; + p_command_buffer->res_barriers[p_command_buffer->res_barriers_count] = CD3DX12_RESOURCE_BARRIER::UAV(br.dx_resource); + p_command_buffer->res_barriers_count++; + res_states->last_batch_with_uav_barrier = p_command_buffer->res_barriers_batch; } if (res_states->subresource_states[0] != br.groups[0].states) { - res_barriers[res_barriers_count] = CD3DX12_RESOURCE_BARRIER::Transition(br.dx_resource, res_states->subresource_states[0], br.groups[0].states, D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES); - res_barriers_count++; + p_command_buffer->res_barriers[p_command_buffer->res_barriers_count] = CD3DX12_RESOURCE_BARRIER::Transition(br.dx_resource, res_states->subresource_states[0], br.groups[0].states, D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES); + p_command_buffer->res_barriers_count++; } for (uint32_t i = 0; i < num_subresources; i++) { @@ -811,23 +761,23 @@ void RenderingDeviceDriverD3D12::_resource_transitions_flush(ID3D12GraphicsComma D3D12_RESOURCE_STATES *curr_state = &res_states->subresource_states[subresource]; bool just_written = *curr_state == D3D12_RESOURCE_STATE_UNORDERED_ACCESS; - bool needs_uav_barrier = just_written && res_states->last_batch_with_uav_barrier != res_barriers_batch; + bool needs_uav_barrier = just_written && res_states->last_batch_with_uav_barrier != p_command_buffer->res_barriers_batch; uint32_t needed_barriers = (needs_uav_barrier ? 1 : 0) + br.planes; - if (res_barriers.size() < res_barriers_count + needed_barriers) { - res_barriers.resize(res_barriers_count + needed_barriers); + if (p_command_buffer->res_barriers.size() < p_command_buffer->res_barriers_count + needed_barriers) { + p_command_buffer->res_barriers.resize(p_command_buffer->res_barriers_count + needed_barriers); } if (needs_uav_barrier) { - res_barriers[res_barriers_count] = CD3DX12_RESOURCE_BARRIER::UAV(br.dx_resource); - res_barriers_count++; - res_states->last_batch_with_uav_barrier = res_barriers_batch; + p_command_buffer->res_barriers[p_command_buffer->res_barriers_count] = CD3DX12_RESOURCE_BARRIER::UAV(br.dx_resource); + p_command_buffer->res_barriers_count++; + res_states->last_batch_with_uav_barrier = p_command_buffer->res_barriers_batch; } if (*curr_state != g.states) { for (uint8_t k = 0; k < br.planes; k++) { - res_barriers[res_barriers_count] = CD3DX12_RESOURCE_BARRIER::Transition(br.dx_resource, *curr_state, g.states, subresource + k * num_subresources); - res_barriers_count++; + p_command_buffer->res_barriers[p_command_buffer->res_barriers_count] = CD3DX12_RESOURCE_BARRIER::Transition(br.dx_resource, *curr_state, g.states, subresource + k * num_subresources); + p_command_buffer->res_barriers_count++; } } @@ -839,19 +789,13 @@ void RenderingDeviceDriverD3D12::_resource_transitions_flush(ID3D12GraphicsComma } } - if (res_barriers_count) { - p_cmd_list->ResourceBarrier(res_barriers_count, res_barriers.ptr()); - res_barriers_requests.clear(); + if (p_command_buffer->res_barriers_count) { + p_command_buffer->cmd_list->ResourceBarrier(p_command_buffer->res_barriers_count, p_command_buffer->res_barriers.ptr()); + p_command_buffer->res_barriers_requests.clear(); } -#ifdef DEBUG_COUNT_BARRIERS - frame_barriers_count += res_barriers_count; - frame_barriers_batches_count++; - frame_barriers_cpu_time += OS::get_singleton()->get_ticks_usec() - start; -#endif - - res_barriers_count = 0; - res_barriers_batch++; + p_command_buffer->res_barriers_count = 0; + p_command_buffer->res_barriers_batch++; } /*****************/ @@ -889,11 +833,7 @@ RDD::BufferID RenderingDeviceDriverD3D12::buffer_create(uint64_t p_size, BitFiel } } break; case MEMORY_ALLOCATION_TYPE_GPU: { -#ifdef USE_SMALL_ALLOCS_POOL - if (p_size <= SMALL_ALLOCATION_MAX_SIZE) { - allocation_desc.CustomPool = _find_or_create_small_allocs_pool(allocation_desc.HeapType, D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS); - } -#endif + // Use default parameters. } break; } @@ -1002,6 +942,7 @@ static const D3D12_UAV_DIMENSION RD_TEXTURE_TYPE_TO_D3D12_VIEW_DIMENSION_FOR_UAV uint32_t RenderingDeviceDriverD3D12::_find_max_common_supported_sample_count(VectorView p_formats) { uint32_t common = UINT32_MAX; + MutexLock lock(format_sample_counts_mask_cache_mutex); for (uint32_t i = 0; i < p_formats.size(); i++) { if (format_sample_counts_mask_cache.has(p_formats[i])) { common &= format_sample_counts_mask_cache[p_formats[i]]; @@ -1292,14 +1233,6 @@ RDD::TextureID RenderingDeviceDriverD3D12::texture_create(const TextureFormat &p allocation_desc.ExtraHeapFlags |= D3D12_HEAP_FLAG_ALLOW_SHADER_ATOMICS; } -#ifdef USE_SMALL_ALLOCS_POOL - uint32_t width = 0, height = 0; - uint32_t image_size = get_image_format_required_size(p_format.format, p_format.width, p_format.height, p_format.depth, p_format.mipmaps, &width, &height); - if (image_size <= SMALL_ALLOCATION_MAX_SIZE) { - allocation_desc.CustomPool = _find_or_create_small_allocs_pool(allocation_desc.HeapType, allocation_desc.ExtraHeapFlags); - } -#endif - D3D12_RESOURCE_STATES initial_state = {}; ID3D12Resource *texture = nullptr; ComPtr main_texture; @@ -4132,6 +4065,7 @@ void RenderingDeviceDriverD3D12::command_uniform_set_prepare_for_use(CommandBuff } } + CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id; const UniformSetInfo *uniform_set_info = (const UniformSetInfo *)p_uniform_set.id; const ShaderInfo *shader_info_in = (const ShaderInfo *)p_shader.id; const ShaderInfo::UniformSet &shader_set = shader_info_in->sets[p_set_index]; @@ -4247,7 +4181,7 @@ void RenderingDeviceDriverD3D12::command_uniform_set_prepare_for_use(CommandBuff if (likely(wanted_state)) { if (sr.is_buffer) { - _resource_transition_batch(sr.resource, 0, 1, wanted_state); + _resource_transition_batch(cmd_buf_info, sr.resource, 0, 1, wanted_state); } else { TextureInfo *tex_info = (TextureInfo *)sr.resource; uint32_t planes = 1; @@ -4257,7 +4191,7 @@ void RenderingDeviceDriverD3D12::command_uniform_set_prepare_for_use(CommandBuff for (uint32_t i = 0; i < tex_info->layers; i++) { for (uint32_t j = 0; j < tex_info->mipmaps; j++) { uint32_t subresource = D3D12CalcSubresource(tex_info->base_mip + j, tex_info->base_layer + i, 0, tex_info->desc.MipLevels, tex_info->desc.ArraySize()); - _resource_transition_batch(tex_info, subresource, planes, wanted_state); + _resource_transition_batch(cmd_buf_info, tex_info, subresource, planes, wanted_state); } } } @@ -4266,8 +4200,7 @@ void RenderingDeviceDriverD3D12::command_uniform_set_prepare_for_use(CommandBuff } if (p_set_index == shader_info_in->sets.size() - 1) { - CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id; - _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + _resource_transitions_flush(cmd_buf_info); } } @@ -4520,7 +4453,7 @@ void RenderingDeviceDriverD3D12::_command_bind_uniform_set(CommandBufferID p_cmd void RenderingDeviceDriverD3D12::command_clear_buffer(CommandBufferID p_cmd_buffer, BufferID p_buffer, uint64_t p_offset, uint64_t p_size) { _command_check_descriptor_sets(p_cmd_buffer); - const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)p_cmd_buffer.id; + CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id; BufferInfo *buf_info = (BufferInfo *)p_buffer.id; if (frames[frame_idx].desc_heap_walkers.resources.is_at_eof()) { @@ -4545,8 +4478,8 @@ void RenderingDeviceDriverD3D12::command_clear_buffer(CommandBufferID p_cmd_buff } if (!barrier_capabilities.enhanced_barriers_supported) { - _resource_transition_batch(buf_info, 0, 1, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); - _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + _resource_transition_batch(cmd_buf_info, buf_info, 0, 1, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + _resource_transitions_flush(cmd_buf_info); } D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc = {}; @@ -4583,14 +4516,14 @@ void RenderingDeviceDriverD3D12::command_clear_buffer(CommandBufferID p_cmd_buff } void RenderingDeviceDriverD3D12::command_copy_buffer(CommandBufferID p_cmd_buffer, BufferID p_src_buffer, BufferID p_buf_locfer, VectorView p_regions) { - const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)p_cmd_buffer.id; + CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id; BufferInfo *src_buf_info = (BufferInfo *)p_src_buffer.id; BufferInfo *buf_loc_info = (BufferInfo *)p_buf_locfer.id; if (!barrier_capabilities.enhanced_barriers_supported) { - _resource_transition_batch(src_buf_info, 0, 1, D3D12_RESOURCE_STATE_COPY_SOURCE); - _resource_transition_batch(buf_loc_info, 0, 1, D3D12_RESOURCE_STATE_COPY_DEST); - _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + _resource_transition_batch(cmd_buf_info, src_buf_info, 0, 1, D3D12_RESOURCE_STATE_COPY_SOURCE); + _resource_transition_batch(cmd_buf_info, buf_loc_info, 0, 1, D3D12_RESOURCE_STATE_COPY_DEST); + _resource_transitions_flush(cmd_buf_info); } for (uint32_t i = 0; i < p_regions.size(); i++) { @@ -4599,7 +4532,7 @@ void RenderingDeviceDriverD3D12::command_copy_buffer(CommandBufferID p_cmd_buffe } void RenderingDeviceDriverD3D12::command_copy_texture(CommandBufferID p_cmd_buffer, TextureID p_src_texture, TextureLayout p_src_texture_layout, TextureID p_dst_texture, TextureLayout p_dst_texture_layout, VectorView p_regions) { - const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)p_cmd_buffer.id; + CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id; TextureInfo *src_tex_info = (TextureInfo *)p_src_texture.id; TextureInfo *dst_tex_info = (TextureInfo *)p_dst_texture.id; @@ -4610,12 +4543,12 @@ void RenderingDeviceDriverD3D12::command_copy_texture(CommandBufferID p_cmd_buff for (uint32_t j = 0; j < layer_count; j++) { UINT src_subresource = _compute_subresource_from_layers(src_tex_info, p_regions[i].src_subresources, j); UINT dst_subresource = _compute_subresource_from_layers(dst_tex_info, p_regions[i].dst_subresources, j); - _resource_transition_batch(src_tex_info, src_subresource, 1, D3D12_RESOURCE_STATE_COPY_SOURCE); - _resource_transition_batch(dst_tex_info, dst_subresource, 1, D3D12_RESOURCE_STATE_COPY_DEST); + _resource_transition_batch(cmd_buf_info, src_tex_info, src_subresource, 1, D3D12_RESOURCE_STATE_COPY_SOURCE); + _resource_transition_batch(cmd_buf_info, dst_tex_info, dst_subresource, 1, D3D12_RESOURCE_STATE_COPY_DEST); } } - _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + _resource_transitions_flush(cmd_buf_info); } CD3DX12_BOX src_box; @@ -4638,23 +4571,23 @@ void RenderingDeviceDriverD3D12::command_copy_texture(CommandBufferID p_cmd_buff } void RenderingDeviceDriverD3D12::command_resolve_texture(CommandBufferID p_cmd_buffer, TextureID p_src_texture, TextureLayout p_src_texture_layout, uint32_t p_src_layer, uint32_t p_src_mipmap, TextureID p_dst_texture, TextureLayout p_dst_texture_layout, uint32_t p_dst_layer, uint32_t p_dst_mipmap) { - const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)p_cmd_buffer.id; + CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id; TextureInfo *src_tex_info = (TextureInfo *)p_src_texture.id; TextureInfo *dst_tex_info = (TextureInfo *)p_dst_texture.id; UINT src_subresource = D3D12CalcSubresource(p_src_mipmap, p_src_layer, 0, src_tex_info->desc.MipLevels, src_tex_info->desc.ArraySize()); UINT dst_subresource = D3D12CalcSubresource(p_dst_mipmap, p_dst_layer, 0, dst_tex_info->desc.MipLevels, dst_tex_info->desc.ArraySize()); if (!barrier_capabilities.enhanced_barriers_supported) { - _resource_transition_batch(src_tex_info, src_subresource, 1, D3D12_RESOURCE_STATE_RESOLVE_SOURCE); - _resource_transition_batch(dst_tex_info, dst_subresource, 1, D3D12_RESOURCE_STATE_RESOLVE_DEST); - _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + _resource_transition_batch(cmd_buf_info, src_tex_info, src_subresource, 1, D3D12_RESOURCE_STATE_RESOLVE_SOURCE); + _resource_transition_batch(cmd_buf_info, dst_tex_info, dst_subresource, 1, D3D12_RESOURCE_STATE_RESOLVE_DEST); + _resource_transitions_flush(cmd_buf_info); } cmd_buf_info->cmd_list->ResolveSubresource(dst_tex_info->resource, dst_subresource, src_tex_info->resource, src_subresource, RD_TO_D3D12_FORMAT[src_tex_info->format].general_format); } void RenderingDeviceDriverD3D12::command_clear_color_texture(CommandBufferID p_cmd_buffer, TextureID p_texture, TextureLayout p_texture_layout, const Color &p_color, const TextureSubresourceRange &p_subresources) { - const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)p_cmd_buffer.id; + CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id; TextureInfo *tex_info = (TextureInfo *)p_texture.id; if (tex_info->main_texture) { tex_info = tex_info->main_texture; @@ -4669,10 +4602,10 @@ void RenderingDeviceDriverD3D12::command_clear_color_texture(CommandBufferID p_c 0, tex_info->desc.MipLevels, tex_info->desc.ArraySize()); - _resource_transition_batch(tex_info, subresource, 1, p_new_state); + _resource_transition_batch(cmd_buf_info, tex_info, subresource, 1, p_new_state); } } - _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + _resource_transitions_flush(cmd_buf_info); }; if ((tex_info->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET)) { @@ -4775,11 +4708,11 @@ void RenderingDeviceDriverD3D12::command_clear_color_texture(CommandBufferID p_c } void RenderingDeviceDriverD3D12::command_copy_buffer_to_texture(CommandBufferID p_cmd_buffer, BufferID p_src_buffer, TextureID p_dst_texture, TextureLayout p_dst_texture_layout, VectorView p_regions) { - const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)p_cmd_buffer.id; + CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id; BufferInfo *buf_info = (BufferInfo *)p_src_buffer.id; TextureInfo *tex_info = (TextureInfo *)p_dst_texture.id; if (!barrier_capabilities.enhanced_barriers_supported) { - _resource_transition_batch(buf_info, 0, 1, D3D12_RESOURCE_STATE_COPY_SOURCE); + _resource_transition_batch(cmd_buf_info, buf_info, 0, 1, D3D12_RESOURCE_STATE_COPY_SOURCE); } uint32_t pixel_size = get_image_format_pixel_size(tex_info->format); @@ -4816,10 +4749,10 @@ void RenderingDeviceDriverD3D12::command_copy_buffer_to_texture(CommandBufferID tex_info->desc.ArraySize()); CD3DX12_TEXTURE_COPY_LOCATION copy_dst(tex_info->resource, dst_subresource); - _resource_transition_batch(tex_info, dst_subresource, 1, D3D12_RESOURCE_STATE_COPY_DEST); + _resource_transition_batch(cmd_buf_info, tex_info, dst_subresource, 1, D3D12_RESOURCE_STATE_COPY_DEST); } - _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + _resource_transitions_flush(cmd_buf_info); } for (uint32_t j = 0; j < p_regions[i].texture_subresources.layer_count; j++) { @@ -4843,12 +4776,12 @@ void RenderingDeviceDriverD3D12::command_copy_buffer_to_texture(CommandBufferID } void RenderingDeviceDriverD3D12::command_copy_texture_to_buffer(CommandBufferID p_cmd_buffer, TextureID p_src_texture, TextureLayout p_src_texture_layout, BufferID p_buf_locfer, VectorView p_regions) { - const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)p_cmd_buffer.id; + CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id; TextureInfo *tex_info = (TextureInfo *)p_src_texture.id; BufferInfo *buf_info = (BufferInfo *)p_buf_locfer.id; if (!barrier_capabilities.enhanced_barriers_supported) { - _resource_transition_batch(buf_info, 0, 1, D3D12_RESOURCE_STATE_COPY_DEST); + _resource_transition_batch(cmd_buf_info, buf_info, 0, 1, D3D12_RESOURCE_STATE_COPY_DEST); } uint32_t block_w = 0, block_h = 0; @@ -4864,10 +4797,10 @@ void RenderingDeviceDriverD3D12::command_copy_texture_to_buffer(CommandBufferID tex_info->desc.MipLevels, tex_info->desc.ArraySize()); - _resource_transition_batch(tex_info, src_subresource, 1, D3D12_RESOURCE_STATE_COPY_SOURCE); + _resource_transition_batch(cmd_buf_info, tex_info, src_subresource, 1, D3D12_RESOURCE_STATE_COPY_SOURCE); } - _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + _resource_transitions_flush(cmd_buf_info); } for (uint32_t j = 0; j < p_regions[i].texture_subresources.layer_count; j++) { @@ -4910,10 +4843,9 @@ void RenderingDeviceDriverD3D12::command_copy_texture_to_buffer(CommandBufferID /******************/ void RenderingDeviceDriverD3D12::pipeline_free(PipelineID p_pipeline) { - ID3D12PipelineState *pso = (ID3D12PipelineState *)p_pipeline.id; - pso->Release(); - pipelines_shaders.erase(pso); - render_psos_extra_info.erase(pso); + PipelineInfo *pipeline_info = (PipelineInfo *)(p_pipeline.id); + pipeline_info->pso->Release(); + memdelete(pipeline_info); } // ----- BINDING ----- @@ -5013,7 +4945,8 @@ void RenderingDeviceDriverD3D12::command_begin_render_pass(CommandBufferID p_cmd 0, p_texture_info->desc.MipLevels, p_texture_info->desc.ArraySize()); - _resource_transition_batch(p_texture_info, subresource, planes, p_states); + + _resource_transition_batch(cmd_buf_info, p_texture_info, subresource, planes, p_states); } } }; @@ -5035,7 +4968,7 @@ void RenderingDeviceDriverD3D12::command_begin_render_pass(CommandBufferID p_cmd _transition_subresources(tex_info, D3D12_RESOURCE_STATE_SHADING_RATE_SOURCE); } - _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + _resource_transitions_flush(cmd_buf_info); } cmd_buf_info->render_pass_state.region_rect = CD3DX12_RECT( @@ -5109,7 +5042,7 @@ void RenderingDeviceDriverD3D12::command_begin_render_pass(CommandBufferID p_cmd } void RenderingDeviceDriverD3D12::_end_render_pass(CommandBufferID p_cmd_buffer) { - const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)p_cmd_buffer.id; + CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id; DEV_ASSERT(cmd_buf_info->render_pass_state.current_subpass != UINT32_MAX); @@ -5122,7 +5055,7 @@ void RenderingDeviceDriverD3D12::_end_render_pass(CommandBufferID p_cmd_buffer) for (uint32_t i = 0; i < fb_info->attachments.size(); i++) { TextureInfo *src_tex_info = (TextureInfo *)(fb_info->attachments[i].id); uint32_t src_subresource = D3D12CalcSubresource(src_tex_info->base_mip, src_tex_info->base_layer, 0, src_tex_info->desc.MipLevels, src_tex_info->desc.ArraySize()); - _resource_transition_batch(src_tex_info, src_subresource, 1, D3D12_RESOURCE_STATE_PRESENT); + _resource_transition_batch(cmd_buf_info, src_tex_info, src_subresource, 1, D3D12_RESOURCE_STATE_PRESENT); } } @@ -5146,11 +5079,11 @@ void RenderingDeviceDriverD3D12::_end_render_pass(CommandBufferID p_cmd_buffer) TextureInfo *src_tex_info = (TextureInfo *)fb_info->attachments[color_index].id; uint32_t src_subresource = D3D12CalcSubresource(src_tex_info->base_mip, src_tex_info->base_layer, 0, src_tex_info->desc.MipLevels, src_tex_info->desc.ArraySize()); - _resource_transition_batch(src_tex_info, src_subresource, 1, D3D12_RESOURCE_STATE_RESOLVE_SOURCE); + _resource_transition_batch(cmd_buf_info, src_tex_info, src_subresource, 1, D3D12_RESOURCE_STATE_RESOLVE_SOURCE); TextureInfo *dst_tex_info = (TextureInfo *)fb_info->attachments[resolve_index].id; uint32_t dst_subresource = D3D12CalcSubresource(dst_tex_info->base_mip, dst_tex_info->base_layer, 0, dst_tex_info->desc.MipLevels, dst_tex_info->desc.ArraySize()); - _resource_transition_batch(dst_tex_info, dst_subresource, 1, D3D12_RESOURCE_STATE_RESOLVE_DEST); + _resource_transition_batch(cmd_buf_info, dst_tex_info, dst_subresource, 1, D3D12_RESOURCE_STATE_RESOLVE_DEST); resolves[num_resolves].src_res = src_tex_info->resource; resolves[num_resolves].src_subres = src_subresource; @@ -5160,7 +5093,7 @@ void RenderingDeviceDriverD3D12::_end_render_pass(CommandBufferID p_cmd_buffer) num_resolves++; } - _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + _resource_transitions_flush(cmd_buf_info); for (uint32_t i = 0; i < num_resolves; i++) { cmd_buf_info->cmd_list->ResolveSubresource(resolves[i].dst_res, resolves[i].dst_subres, resolves[i].src_res, resolves[i].src_subres, resolves[i].format); @@ -5348,36 +5281,36 @@ void RenderingDeviceDriverD3D12::command_render_clear_attachments(CommandBufferI void RenderingDeviceDriverD3D12::command_bind_render_pipeline(CommandBufferID p_cmd_buffer, PipelineID p_pipeline) { CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id; - ID3D12PipelineState *pso = (ID3D12PipelineState *)p_pipeline.id; + const PipelineInfo *pipeline_info = (const PipelineInfo *)p_pipeline.id; - if (cmd_buf_info->graphics_pso == pso) { + if (cmd_buf_info->graphics_pso == pipeline_info->pso) { return; } - const ShaderInfo *shader_info_in = pipelines_shaders[pso]; - const RenderPipelineExtraInfo &pso_extra_info = render_psos_extra_info[pso]; + const ShaderInfo *shader_info_in = pipeline_info->shader_info; + const RenderPipelineInfo &render_info = pipeline_info->render_info; - cmd_buf_info->cmd_list->SetPipelineState(pso); + cmd_buf_info->cmd_list->SetPipelineState(pipeline_info->pso); if (cmd_buf_info->graphics_root_signature_crc != shader_info_in->root_signature_crc) { cmd_buf_info->cmd_list->SetGraphicsRootSignature(shader_info_in->root_signature.Get()); cmd_buf_info->graphics_root_signature_crc = shader_info_in->root_signature_crc; } - cmd_buf_info->cmd_list->IASetPrimitiveTopology(pso_extra_info.dyn_params.primitive_topology); - cmd_buf_info->cmd_list->OMSetBlendFactor(pso_extra_info.dyn_params.blend_constant.components); - cmd_buf_info->cmd_list->OMSetStencilRef(pso_extra_info.dyn_params.stencil_reference); + cmd_buf_info->cmd_list->IASetPrimitiveTopology(render_info.dyn_params.primitive_topology); + cmd_buf_info->cmd_list->OMSetBlendFactor(render_info.dyn_params.blend_constant.components); + cmd_buf_info->cmd_list->OMSetStencilRef(render_info.dyn_params.stencil_reference); if (misc_features_support.depth_bounds_supported) { ComPtr command_list_1; cmd_buf_info->cmd_list->QueryInterface(command_list_1.GetAddressOf()); if (command_list_1) { - command_list_1->OMSetDepthBounds(pso_extra_info.dyn_params.depth_bounds_min, pso_extra_info.dyn_params.depth_bounds_max); + command_list_1->OMSetDepthBounds(render_info.dyn_params.depth_bounds_min, render_info.dyn_params.depth_bounds_max); } } - cmd_buf_info->render_pass_state.vf_info = pso_extra_info.vf_info; + cmd_buf_info->render_pass_state.vf_info = render_info.vf_info; - cmd_buf_info->graphics_pso = pso; + cmd_buf_info->graphics_pso = pipeline_info->pso; cmd_buf_info->compute_pso = nullptr; } @@ -5402,8 +5335,8 @@ void RenderingDeviceDriverD3D12::command_render_draw_indexed_indirect(CommandBuf _bind_vertex_buffers(cmd_buf_info); BufferInfo *indirect_buf_info = (BufferInfo *)p_indirect_buffer.id; if (!barrier_capabilities.enhanced_barriers_supported) { - _resource_transition_batch(indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT); - _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + _resource_transition_batch(cmd_buf_info, indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT); + _resource_transitions_flush(cmd_buf_info); } cmd_buf_info->cmd_list->ExecuteIndirect(indirect_cmd_signatures.draw_indexed.Get(), p_draw_count, indirect_buf_info->resource, p_offset, nullptr, 0); @@ -5415,9 +5348,9 @@ void RenderingDeviceDriverD3D12::command_render_draw_indexed_indirect_count(Comm BufferInfo *indirect_buf_info = (BufferInfo *)p_indirect_buffer.id; BufferInfo *count_buf_info = (BufferInfo *)p_count_buffer.id; if (!barrier_capabilities.enhanced_barriers_supported) { - _resource_transition_batch(indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT); - _resource_transition_batch(count_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT); - _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + _resource_transition_batch(cmd_buf_info, indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT); + _resource_transition_batch(cmd_buf_info, count_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT); + _resource_transitions_flush(cmd_buf_info); } cmd_buf_info->cmd_list->ExecuteIndirect(indirect_cmd_signatures.draw_indexed.Get(), p_max_draw_count, indirect_buf_info->resource, p_offset, count_buf_info->resource, p_count_buffer_offset); @@ -5428,8 +5361,8 @@ void RenderingDeviceDriverD3D12::command_render_draw_indirect(CommandBufferID p_ _bind_vertex_buffers(cmd_buf_info); BufferInfo *indirect_buf_info = (BufferInfo *)p_indirect_buffer.id; if (!barrier_capabilities.enhanced_barriers_supported) { - _resource_transition_batch(indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT); - _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + _resource_transition_batch(cmd_buf_info, indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT); + _resource_transitions_flush(cmd_buf_info); } cmd_buf_info->cmd_list->ExecuteIndirect(indirect_cmd_signatures.draw.Get(), p_draw_count, indirect_buf_info->resource, p_offset, nullptr, 0); @@ -5441,9 +5374,9 @@ void RenderingDeviceDriverD3D12::command_render_draw_indirect_count(CommandBuffe BufferInfo *indirect_buf_info = (BufferInfo *)p_indirect_buffer.id; BufferInfo *count_buf_info = (BufferInfo *)p_count_buffer.id; if (!barrier_capabilities.enhanced_barriers_supported) { - _resource_transition_batch(indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT); - _resource_transition_batch(count_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT); - _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + _resource_transition_batch(cmd_buf_info, indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT); + _resource_transition_batch(cmd_buf_info, count_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT); + _resource_transitions_flush(cmd_buf_info); } cmd_buf_info->cmd_list->ExecuteIndirect(indirect_cmd_signatures.draw.Get(), p_max_draw_count, indirect_buf_info->resource, p_offset, count_buf_info->resource, p_count_buffer_offset); @@ -5465,19 +5398,19 @@ void RenderingDeviceDriverD3D12::command_render_bind_vertex_buffers(CommandBuffe cmd_buf_info->render_pass_state.vertex_buffer_views[i].BufferLocation = buffer_info->resource->GetGPUVirtualAddress() + p_offsets[i]; cmd_buf_info->render_pass_state.vertex_buffer_views[i].SizeInBytes = buffer_info->size - p_offsets[i]; if (!barrier_capabilities.enhanced_barriers_supported) { - _resource_transition_batch(buffer_info, 0, 1, D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER); + _resource_transition_batch(cmd_buf_info, buffer_info, 0, 1, D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER); } } if (!barrier_capabilities.enhanced_barriers_supported) { - _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + _resource_transitions_flush(cmd_buf_info); } cmd_buf_info->render_pass_state.vertex_buffer_count = p_binding_count; } void RenderingDeviceDriverD3D12::command_render_bind_index_buffer(CommandBufferID p_cmd_buffer, BufferID p_buffer, IndexBufferFormat p_format, uint64_t p_offset) { - const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)p_cmd_buffer.id; + CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id; BufferInfo *buffer_info = (BufferInfo *)p_buffer.id; D3D12_INDEX_BUFFER_VIEW d3d12_ib_view = {}; @@ -5486,8 +5419,8 @@ void RenderingDeviceDriverD3D12::command_render_bind_index_buffer(CommandBufferI d3d12_ib_view.Format = p_format == INDEX_BUFFER_FORMAT_UINT16 ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R32_UINT; if (!barrier_capabilities.enhanced_barriers_supported) { - _resource_transition_batch(buffer_info, 0, 1, D3D12_RESOURCE_STATE_INDEX_BUFFER); - _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + _resource_transition_batch(cmd_buf_info, buffer_info, 0, 1, D3D12_RESOURCE_STATE_INDEX_BUFFER); + _resource_transitions_flush(cmd_buf_info); } cmd_buf_info->cmd_list->IASetIndexBuffer(&d3d12_ib_view); @@ -5628,9 +5561,9 @@ RDD::PipelineID RenderingDeviceDriverD3D12::render_pipeline_create( const ShaderInfo *shader_info_in = (const ShaderInfo *)p_shader.id; CD3DX12_PIPELINE_STATE_STREAM pipeline_desc = {}; - RenderPipelineExtraInfo pso_extra_info; const RenderPassInfo *pass_info = (const RenderPassInfo *)p_render_pass.id; + RenderPipelineInfo render_info; // Attachments. LocalVector color_attachments; @@ -5664,7 +5597,7 @@ RDD::PipelineID RenderingDeviceDriverD3D12::render_pipeline_create( const VertexFormatInfo *vf_info = (const VertexFormatInfo *)p_vertex_format.id; (&pipeline_desc.InputLayout)->pInputElementDescs = vf_info->input_elem_descs.ptr(); (&pipeline_desc.InputLayout)->NumElements = vf_info->input_elem_descs.size(); - pso_extra_info.vf_info = vf_info; + render_info.vf_info = vf_info; } // Input assembly & tessellation. @@ -5673,9 +5606,9 @@ RDD::PipelineID RenderingDeviceDriverD3D12::render_pipeline_create( if (p_render_primitive == RENDER_PRIMITIVE_TESSELATION_PATCH) { // Is there any way to get the true point count limit? ERR_FAIL_COND_V(p_rasterization_state.patch_control_points < 1 || p_rasterization_state.patch_control_points > 32, PipelineID()); - pso_extra_info.dyn_params.primitive_topology = (D3D12_PRIMITIVE_TOPOLOGY)((int)D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST + p_rasterization_state.patch_control_points); + render_info.dyn_params.primitive_topology = (D3D12_PRIMITIVE_TOPOLOGY)((int)D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST + p_rasterization_state.patch_control_points); } else { - pso_extra_info.dyn_params.primitive_topology = RD_PRIMITIVE_TO_D3D12_TOPOLOGY[p_render_primitive]; + render_info.dyn_params.primitive_topology = RD_PRIMITIVE_TO_D3D12_TOPOLOGY[p_render_primitive]; } if (p_render_primitive == RENDER_PRIMITIVE_TRIANGLE_STRIPS_WITH_RESTART_INDEX) { // TODO: This is right for 16-bit indices; for 32-bit there's a different enum value to set, but we don't know at this point. @@ -5763,15 +5696,15 @@ RDD::PipelineID RenderingDeviceDriverD3D12::render_pipeline_create( (&pipeline_desc.DepthStencilState)->BackFace.StencilFunc = RD_TO_D3D12_COMPARE_OP[p_depth_stencil_state.back_op.compare]; if (misc_features_support.depth_bounds_supported) { - pso_extra_info.dyn_params.depth_bounds_min = p_depth_stencil_state.enable_depth_range ? p_depth_stencil_state.depth_range_min : 0.0f; - pso_extra_info.dyn_params.depth_bounds_max = p_depth_stencil_state.enable_depth_range ? p_depth_stencil_state.depth_range_max : 1.0f; + render_info.dyn_params.depth_bounds_min = p_depth_stencil_state.enable_depth_range ? p_depth_stencil_state.depth_range_min : 0.0f; + render_info.dyn_params.depth_bounds_max = p_depth_stencil_state.enable_depth_range ? p_depth_stencil_state.depth_range_max : 1.0f; } else { if (p_depth_stencil_state.enable_depth_range) { WARN_PRINT_ONCE("Depth bounds test is not supported by the GPU driver."); } } - pso_extra_info.dyn_params.stencil_reference = p_depth_stencil_state.front_op.reference; + render_info.dyn_params.stencil_reference = p_depth_stencil_state.front_op.reference; } // Blend states. @@ -5818,7 +5751,7 @@ RDD::PipelineID RenderingDeviceDriverD3D12::render_pipeline_create( (&pipeline_desc.BlendState)->IndependentBlendEnable = !all_attachments_same_blend; } - pso_extra_info.dyn_params.blend_constant = p_blend_state.blend_constant; + render_info.dyn_params.blend_constant = p_blend_state.blend_constant; // Stages bytecodes + specialization constants. @@ -5852,12 +5785,12 @@ RDD::PipelineID RenderingDeviceDriverD3D12::render_pipeline_create( } ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), PipelineID(), "Create(Graphics)PipelineState failed with error " + vformat("0x%08ux", (uint64_t)res) + "."); - // Bookkeep ancillary info. + PipelineInfo *pipeline_info = memnew(PipelineInfo); + pipeline_info->pso = pso; + pipeline_info->shader_info = shader_info_in; + pipeline_info->render_info = render_info; - pipelines_shaders[pso] = shader_info_in; - render_psos_extra_info[pso] = pso_extra_info; - - return PipelineID(pso); + return PipelineID(pipeline_info); } /*****************/ @@ -5868,20 +5801,20 @@ RDD::PipelineID RenderingDeviceDriverD3D12::render_pipeline_create( void RenderingDeviceDriverD3D12::command_bind_compute_pipeline(CommandBufferID p_cmd_buffer, PipelineID p_pipeline) { CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id; - ID3D12PipelineState *pso = (ID3D12PipelineState *)p_pipeline.id; - const ShaderInfo *shader_info_in = pipelines_shaders[pso]; + const PipelineInfo *pipeline_info = (const PipelineInfo *)p_pipeline.id; - if (cmd_buf_info->compute_pso == pso) { + if (cmd_buf_info->compute_pso == pipeline_info->pso) { return; } - cmd_buf_info->cmd_list->SetPipelineState(pso); + const ShaderInfo *shader_info_in = pipeline_info->shader_info; + cmd_buf_info->cmd_list->SetPipelineState(pipeline_info->pso); if (cmd_buf_info->compute_root_signature_crc != shader_info_in->root_signature_crc) { cmd_buf_info->cmd_list->SetComputeRootSignature(shader_info_in->root_signature.Get()); cmd_buf_info->compute_root_signature_crc = shader_info_in->root_signature_crc; } - cmd_buf_info->compute_pso = pso; + cmd_buf_info->compute_pso = pipeline_info->pso; cmd_buf_info->graphics_pso = nullptr; } @@ -5890,20 +5823,20 @@ void RenderingDeviceDriverD3D12::command_bind_compute_uniform_set(CommandBufferI } void RenderingDeviceDriverD3D12::command_compute_dispatch(CommandBufferID p_cmd_buffer, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) { - const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)p_cmd_buffer.id; + CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id; if (!barrier_capabilities.enhanced_barriers_supported) { - _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + _resource_transitions_flush(cmd_buf_info); } cmd_buf_info->cmd_list->Dispatch(p_x_groups, p_y_groups, p_z_groups); } void RenderingDeviceDriverD3D12::command_compute_dispatch_indirect(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset) { - const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)p_cmd_buffer.id; + CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id; BufferInfo *indirect_buf_info = (BufferInfo *)p_indirect_buffer.id; if (!barrier_capabilities.enhanced_barriers_supported) { - _resource_transition_batch(indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT); - _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + _resource_transition_batch(cmd_buf_info, indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT); + _resource_transitions_flush(cmd_buf_info); } cmd_buf_info->cmd_list->ExecuteIndirect(indirect_cmd_signatures.dispatch.Get(), 1, indirect_buf_info->resource, p_offset, nullptr, 0); @@ -5944,11 +5877,11 @@ RDD::PipelineID RenderingDeviceDriverD3D12::compute_pipeline_create(ShaderID p_s } ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), PipelineID(), "Create(Compute)PipelineState failed with error " + vformat("0x%08ux", (uint64_t)res) + "."); - // Bookkeep ancillary info. - - pipelines_shaders[pso] = shader_info_in; + PipelineInfo *pipeline_info = memnew(PipelineInfo); + pipeline_info->pso = pso; + pipeline_info->shader_info = shader_info_in; - return PipelineID(pso); + return PipelineID(pipeline_info); } /*****************/ @@ -6111,8 +6044,8 @@ void RenderingDeviceDriverD3D12::set_object_name(ObjectType p_type, ID p_driver_ } } break; case OBJECT_TYPE_PIPELINE: { - ID3D12PipelineState *pso = (ID3D12PipelineState *)p_driver_id.id; - _set_object_name(pso, p_name); + const PipelineInfo *pipeline_info = (const PipelineInfo *)p_driver_id.id; + _set_object_name(pipeline_info->pso, p_name); } break; default: { DEV_ASSERT(false); diff --git a/drivers/d3d12/rendering_device_driver_d3d12.h b/drivers/d3d12/rendering_device_driver_d3d12.h index d8381279ec83..b449a9087665 100644 --- a/drivers/d3d12/rendering_device_driver_d3d12.h +++ b/drivers/d3d12/rendering_device_driver_d3d12.h @@ -80,7 +80,6 @@ using Microsoft::WRL::ComPtr; #define D3D12_BITCODE_OFFSETS_NUM_STAGES 3 #ifdef DEV_ENABLED -//#define DEBUG_COUNT_BARRIERS #define CUSTOM_INFO_QUEUE_ENABLED 0 #endif @@ -223,20 +222,6 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver { ComPtr allocator; -#define USE_SMALL_ALLOCS_POOL // Disabled by now; seems not to be beneficial as it is in Vulkan. -#ifdef USE_SMALL_ALLOCS_POOL - union AllocPoolKey { - struct { - D3D12_HEAP_TYPE heap_type; - D3D12_HEAP_FLAGS heap_flags; - }; - uint64_t key = 0; - }; - HashMap> small_allocs_pools; - - D3D12MA::Pool *_find_or_create_small_allocs_pool(D3D12_HEAP_TYPE p_heap_type, D3D12_HEAP_FLAGS p_heap_flags); -#endif - /******************/ /**** RESOURCE ****/ /******************/ @@ -274,20 +259,11 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver { uint8_t groups_count = 0; static const D3D12_RESOURCE_STATES DELETED_GROUP = D3D12_RESOURCE_STATES(0xFFFFFFFFU); }; - PagedAllocator> res_barriers_requests_allocator; - HashMap, decltype(res_barriers_requests_allocator)> res_barriers_requests; - - LocalVector res_barriers; - uint32_t res_barriers_count = 0; - uint32_t res_barriers_batch = 0; -#ifdef DEBUG_COUNT_BARRIERS - int frame_barriers_count = 0; - int frame_barriers_batches_count = 0; - uint64_t frame_barriers_cpu_time = 0; -#endif - void _resource_transition_batch(ResourceInfo *p_resource, uint32_t p_subresource, uint32_t p_num_planes, D3D12_RESOURCE_STATES p_new_state); - void _resource_transitions_flush(ID3D12GraphicsCommandList *p_cmd_list); + struct CommandBufferInfo; + + void _resource_transition_batch(CommandBufferInfo *p_command_buffer, ResourceInfo *p_resource, uint32_t p_subresource, uint32_t p_num_planes, D3D12_RESOURCE_STATES p_new_state); + void _resource_transitions_flush(CommandBufferInfo *p_command_buffer); /*****************/ /**** BUFFERS ****/ @@ -334,6 +310,7 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver { SelfList::List textures_pending_clear; HashMap format_sample_counts_mask_cache; + Mutex format_sample_counts_mask_cache_mutex; uint32_t _find_max_common_supported_sample_count(VectorView p_formats); UINT _compute_component_mapping(const TextureView &p_view); @@ -341,7 +318,6 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver { UINT _compute_plane_slice(DataFormat p_format, TextureAspect p_aspect); UINT _compute_subresource_from_layers(TextureInfo *p_texture, const TextureSubresourceLayers &p_layers, uint32_t p_layer_offset); - struct CommandBufferInfo; void _discard_texture_subresources(const TextureInfo *p_tex_info, const CommandBufferInfo *p_cmd_buf_info); protected: @@ -492,6 +468,11 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver { RenderPassState render_pass_state; bool descriptor_heaps_set = false; + + HashMap res_barriers_requests; + LocalVector res_barriers; + uint32_t res_barriers_count = 0; + uint32_t res_barriers_batch = 0; }; public: @@ -797,10 +778,25 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver { /**** PIPELINE ****/ /******************/ - virtual void pipeline_free(PipelineID p_pipeline) override final; + struct RenderPipelineInfo { + const VertexFormatInfo *vf_info = nullptr; -private: - HashMap pipelines_shaders; + struct { + D3D12_PRIMITIVE_TOPOLOGY primitive_topology = {}; + Color blend_constant; + float depth_bounds_min = 0.0f; + float depth_bounds_max = 0.0f; + uint32_t stencil_reference = 0; + } dyn_params; + }; + + struct PipelineInfo { + ID3D12PipelineState *pso = nullptr; + const ShaderInfo *shader_info = nullptr; + RenderPipelineInfo render_info; + }; + + virtual void pipeline_free(PipelineID p_pipeline) override final; public: // ----- BINDING ----- @@ -873,20 +869,6 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver { // ----- PIPELINE ----- -private: - struct RenderPipelineExtraInfo { - struct { - D3D12_PRIMITIVE_TOPOLOGY primitive_topology = {}; - Color blend_constant; - float depth_bounds_min = 0.0f; - float depth_bounds_max = 0.0f; - uint32_t stencil_reference = 0; - } dyn_params; - - const VertexFormatInfo *vf_info = nullptr; - }; - HashMap render_psos_extra_info; - public: virtual PipelineID render_pipeline_create( ShaderID p_shader, @@ -1034,7 +1016,7 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver { UniformSetInfo, RenderPassInfo, TimestampQueryPoolInfo>; - PagedAllocator resources_allocator; + PagedAllocator resources_allocator; /******************/ diff --git a/drivers/gles3/rasterizer_canvas_gles3.h b/drivers/gles3/rasterizer_canvas_gles3.h index 9c0d0abccb09..a82e2713e02b 100644 --- a/drivers/gles3/rasterizer_canvas_gles3.h +++ b/drivers/gles3/rasterizer_canvas_gles3.h @@ -379,6 +379,8 @@ class RasterizerCanvasGLES3 : public RendererCanvasRender { } } + virtual uint32_t get_pipeline_compilations(RS::PipelineSource p_source) override { return 0; } + static RasterizerCanvasGLES3 *get_singleton(); RasterizerCanvasGLES3(); ~RasterizerCanvasGLES3(); diff --git a/drivers/gles3/rasterizer_scene_gles3.h b/drivers/gles3/rasterizer_scene_gles3.h index e4af8f99e921..06371b2b7f82 100644 --- a/drivers/gles3/rasterizer_scene_gles3.h +++ b/drivers/gles3/rasterizer_scene_gles3.h @@ -767,6 +767,11 @@ class RasterizerSceneGLES3 : public RendererSceneRender { uint32_t geometry_instance_get_pair_mask() override; + /* PIPELINES */ + + virtual void mesh_generate_pipelines(RID p_mesh, bool p_background_compilation) override {} + virtual uint32_t get_pipeline_compilations(RS::PipelineSource p_source) override { return 0; } + /* SDFGI UPDATE */ void sdfgi_update(const Ref &p_render_buffers, RID p_environment, const Vector3 &p_world_position) override {} diff --git a/drivers/gles3/storage/material_storage.cpp b/drivers/gles3/storage/material_storage.cpp index c29c741c2a16..0d713228c7cd 100644 --- a/drivers/gles3/storage/material_storage.cpp +++ b/drivers/gles3/storage/material_storage.cpp @@ -1509,6 +1509,10 @@ MaterialStorage::~MaterialStorage() { singleton = nullptr; } +bool MaterialStorage::can_create_resources_async() const { + return false; +} + /* GLOBAL SHADER UNIFORM API */ int32_t MaterialStorage::_global_shader_uniform_allocate(uint32_t p_elements) { diff --git a/drivers/gles3/storage/material_storage.h b/drivers/gles3/storage/material_storage.h index 392ebcc570d9..4432643fbda6 100644 --- a/drivers/gles3/storage/material_storage.h +++ b/drivers/gles3/storage/material_storage.h @@ -551,6 +551,8 @@ class MaterialStorage : public RendererMaterialStorage { ShaderCompiler compiler_sky; } shaders; + virtual bool can_create_resources_async() const override; + /* GLOBAL SHADER UNIFORM API */ void _update_global_shader_uniforms(); diff --git a/drivers/vulkan/rendering_device_driver_vulkan.cpp b/drivers/vulkan/rendering_device_driver_vulkan.cpp index bd395f41e228..d20f396281e1 100644 --- a/drivers/vulkan/rendering_device_driver_vulkan.cpp +++ b/drivers/vulkan/rendering_device_driver_vulkan.cpp @@ -4103,10 +4103,6 @@ bool RenderingDeviceDriverVulkan::pipeline_cache_create(const Vector &p cache_info.initialDataSize = pipelines_cache.buffer.size() - sizeof(PipelineCacheHeader); cache_info.pInitialData = pipelines_cache.buffer.ptr() + sizeof(PipelineCacheHeader); - if (pipeline_cache_control_support) { - cache_info.flags = VK_PIPELINE_CACHE_CREATE_EXTERNALLY_SYNCHRONIZED_BIT; - } - VkResult err = vkCreatePipelineCache(vk_device, &cache_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_PIPELINE_CACHE), &pipelines_cache.vk_cache); if (err != VK_SUCCESS) { WARN_PRINT("vkCreatePipelinecache failed with error " + itos(err) + "."); diff --git a/drivers/vulkan/rendering_device_driver_vulkan.h b/drivers/vulkan/rendering_device_driver_vulkan.h index 81f4256941e1..787de387c003 100644 --- a/drivers/vulkan/rendering_device_driver_vulkan.h +++ b/drivers/vulkan/rendering_device_driver_vulkan.h @@ -672,7 +672,7 @@ class RenderingDeviceDriverVulkan : public RenderingDeviceDriver { VertexFormatInfo, ShaderInfo, UniformSetInfo>; - PagedAllocator resources_allocator; + PagedAllocator resources_allocator; /******************/ diff --git a/main/performance.cpp b/main/performance.cpp index 0547b3bff07c..c73fb62b760b 100644 --- a/main/performance.cpp +++ b/main/performance.cpp @@ -92,6 +92,11 @@ void Performance::_bind_methods() { BIND_ENUM_CONSTANT(NAVIGATION_EDGE_CONNECTION_COUNT); BIND_ENUM_CONSTANT(NAVIGATION_EDGE_FREE_COUNT); BIND_ENUM_CONSTANT(NAVIGATION_OBSTACLE_COUNT); + BIND_ENUM_CONSTANT(PIPELINE_COMPILATIONS_CANVAS); + BIND_ENUM_CONSTANT(PIPELINE_COMPILATIONS_MESH); + BIND_ENUM_CONSTANT(PIPELINE_COMPILATIONS_SURFACE); + BIND_ENUM_CONSTANT(PIPELINE_COMPILATIONS_DRAW); + BIND_ENUM_CONSTANT(PIPELINE_COMPILATIONS_SPECIALIZATION); BIND_ENUM_CONSTANT(MONITOR_MAX); } @@ -143,7 +148,11 @@ String Performance::get_monitor_name(Monitor p_monitor) const { PNAME("navigation/edges_connected"), PNAME("navigation/edges_free"), PNAME("navigation/obstacles"), - + PNAME("pipeline/compilations_canvas"), + PNAME("pipeline/compilations_mesh"), + PNAME("pipeline/compilations_surface"), + PNAME("pipeline/compilations_draw"), + PNAME("pipeline/compilations_specialization"), }; return names[p_monitor]; @@ -185,6 +194,16 @@ double Performance::get_monitor(Monitor p_monitor) const { return RS::get_singleton()->get_rendering_info(RS::RENDERING_INFO_TEXTURE_MEM_USED); case RENDER_BUFFER_MEM_USED: return RS::get_singleton()->get_rendering_info(RS::RENDERING_INFO_BUFFER_MEM_USED); + case PIPELINE_COMPILATIONS_CANVAS: + return RS::get_singleton()->get_rendering_info(RS::RENDERING_INFO_PIPELINE_COMPILATIONS_CANVAS); + case PIPELINE_COMPILATIONS_MESH: + return RS::get_singleton()->get_rendering_info(RS::RENDERING_INFO_PIPELINE_COMPILATIONS_MESH); + case PIPELINE_COMPILATIONS_SURFACE: + return RS::get_singleton()->get_rendering_info(RS::RENDERING_INFO_PIPELINE_COMPILATIONS_SURFACE); + case PIPELINE_COMPILATIONS_DRAW: + return RS::get_singleton()->get_rendering_info(RS::RENDERING_INFO_PIPELINE_COMPILATIONS_DRAW); + case PIPELINE_COMPILATIONS_SPECIALIZATION: + return RS::get_singleton()->get_rendering_info(RS::RENDERING_INFO_PIPELINE_COMPILATIONS_SPECIALIZATION); case PHYSICS_2D_ACTIVE_OBJECTS: return PhysicsServer2D::get_singleton()->get_process_info(PhysicsServer2D::INFO_ACTIVE_OBJECTS); case PHYSICS_2D_COLLISION_PAIRS: diff --git a/main/performance.h b/main/performance.h index 05d678fe5573..e88bdcb337bd 100644 --- a/main/performance.h +++ b/main/performance.h @@ -101,6 +101,11 @@ class Performance : public Object { NAVIGATION_EDGE_CONNECTION_COUNT, NAVIGATION_EDGE_FREE_COUNT, NAVIGATION_OBSTACLE_COUNT, + PIPELINE_COMPILATIONS_CANVAS, + PIPELINE_COMPILATIONS_MESH, + PIPELINE_COMPILATIONS_SURFACE, + PIPELINE_COMPILATIONS_DRAW, + PIPELINE_COMPILATIONS_SPECIALIZATION, MONITOR_MAX }; diff --git a/scene/register_scene_types.cpp b/scene/register_scene_types.cpp index 09227e260f11..6b1ce2b4ca7f 100644 --- a/scene/register_scene_types.cpp +++ b/scene/register_scene_types.cpp @@ -883,7 +883,6 @@ void register_scene_types() { GDREGISTER_CLASS(ProceduralSkyMaterial); GDREGISTER_CLASS(PanoramaSkyMaterial); GDREGISTER_CLASS(PhysicalSkyMaterial); - SceneTree::add_idle_callback(BaseMaterial3D::flush_changes); BaseMaterial3D::init_shaders(); GDREGISTER_CLASS(MeshLibrary); diff --git a/scene/resources/3d/fog_material.cpp b/scene/resources/3d/fog_material.cpp index 92246b50db63..6c6e98b50dc1 100644 --- a/scene/resources/3d/fog_material.cpp +++ b/scene/resources/3d/fog_material.cpp @@ -168,6 +168,8 @@ void fog() { } FogMaterial::FogMaterial() { + _set_material(RS::get_singleton()->material_create()); + set_density(1.0); set_albedo(Color(1, 1, 1, 1)); set_emission(Color(0, 0, 0, 1)); diff --git a/scene/resources/3d/sky_material.cpp b/scene/resources/3d/sky_material.cpp index c470db5d7f23..10ef516f7a41 100644 --- a/scene/resources/3d/sky_material.cpp +++ b/scene/resources/3d/sky_material.cpp @@ -357,6 +357,7 @@ void sky() { } ProceduralSkyMaterial::ProceduralSkyMaterial() { + _set_material(RS::get_singleton()->material_create()); set_sky_top_color(Color(0.385, 0.454, 0.55)); set_sky_horizon_color(Color(0.6463, 0.6558, 0.6708)); set_sky_curve(0.15); @@ -486,6 +487,7 @@ void sky() { } PanoramaSkyMaterial::PanoramaSkyMaterial() { + _set_material(RS::get_singleton()->material_create()); set_energy_multiplier(1.0); } @@ -785,6 +787,7 @@ void sky() { } PhysicalSkyMaterial::PhysicalSkyMaterial() { + _set_material(RS::get_singleton()->material_create()); set_rayleigh_coefficient(2.0); set_rayleigh_color(Color(0.3, 0.405, 0.6)); set_mie_coefficient(0.005); diff --git a/scene/resources/canvas_item_material.cpp b/scene/resources/canvas_item_material.cpp index 76e99aca9291..6f43106ea98f 100644 --- a/scene/resources/canvas_item_material.cpp +++ b/scene/resources/canvas_item_material.cpp @@ -274,6 +274,8 @@ void CanvasItemMaterial::_bind_methods() { CanvasItemMaterial::CanvasItemMaterial() : element(this) { + _set_material(RS::get_singleton()->material_create()); + set_particles_anim_h_frames(1); set_particles_anim_v_frames(1); set_particles_anim_loop(false); diff --git a/scene/resources/material.cpp b/scene/resources/material.cpp index 927e76e4b2d0..2df486575a65 100644 --- a/scene/resources/material.cpp +++ b/scene/resources/material.cpp @@ -46,11 +46,15 @@ void Material::set_next_pass(const Ref &p_pass) { } next_pass = p_pass; - RID next_pass_rid; - if (next_pass.is_valid()) { - next_pass_rid = next_pass->get_rid(); + + if (material.is_valid()) { + RID next_pass_rid; + if (next_pass.is_valid()) { + next_pass_rid = next_pass->get_rid(); + } + + RS::get_singleton()->material_set_next_pass(material, next_pass_rid); } - RS::get_singleton()->material_set_next_pass(material, next_pass_rid); } Ref Material::get_next_pass() const { @@ -61,7 +65,10 @@ void Material::set_render_priority(int p_priority) { ERR_FAIL_COND(p_priority < RENDER_PRIORITY_MIN); ERR_FAIL_COND(p_priority > RENDER_PRIORITY_MAX); render_priority = p_priority; - RS::get_singleton()->material_set_render_priority(material, p_priority); + + if (material.is_valid()) { + RS::get_singleton()->material_set_render_priority(material, p_priority); + } } int Material::get_render_priority() const { @@ -165,13 +172,14 @@ void Material::_bind_methods() { } Material::Material() { - material = RenderingServer::get_singleton()->material_create(); render_priority = 0; } Material::~Material() { - ERR_FAIL_NULL(RenderingServer::get_singleton()); - RenderingServer::get_singleton()->free(material); + if (material.is_valid()) { + ERR_FAIL_NULL(RenderingServer::get_singleton()); + RenderingServer::get_singleton()->free(material); + } } /////////////////////////////////// @@ -422,7 +430,11 @@ void ShaderMaterial::set_shader(const Ref &p_shader) { } } - RS::get_singleton()->material_set_shader(_get_material(), rid); + RID material_rid = _get_material(); + if (material_rid.is_valid()) { + RS::get_singleton()->material_set_shader(material_rid, rid); + } + notify_property_list_changed(); //properties for shader exposed emit_changed(); } @@ -432,9 +444,12 @@ Ref ShaderMaterial::get_shader() const { } void ShaderMaterial::set_shader_parameter(const StringName &p_param, const Variant &p_value) { + RID material_rid = _get_material(); if (p_value.get_type() == Variant::NIL) { param_cache.erase(p_param); - RS::get_singleton()->material_set_param(_get_material(), p_param, Variant()); + if (material_rid.is_valid()) { + RS::get_singleton()->material_set_param(material_rid, p_param, Variant()); + } } else { Variant *v = param_cache.getptr(p_param); if (!v) { @@ -449,12 +464,15 @@ void ShaderMaterial::set_shader_parameter(const StringName &p_param, const Varia RID tex_rid = p_value; if (tex_rid == RID()) { param_cache.erase(p_param); - RS::get_singleton()->material_set_param(_get_material(), p_param, Variant()); - } else { - RS::get_singleton()->material_set_param(_get_material(), p_param, tex_rid); + + if (material_rid.is_valid()) { + RS::get_singleton()->material_set_param(material_rid, p_param, Variant()); + } + } else if (material_rid.is_valid()) { + RS::get_singleton()->material_set_param(material_rid, p_param, tex_rid); } - } else { - RS::get_singleton()->material_set_param(_get_material(), p_param, p_value); + } else if (material_rid.is_valid()) { + RS::get_singleton()->material_set_param(material_rid, p_param, p_value); } } } @@ -471,6 +489,32 @@ void ShaderMaterial::_shader_changed() { notify_property_list_changed(); //update all properties } +void ShaderMaterial::_check_material_rid() const { + MutexLock lock(material_rid_mutex); + if (_get_material().is_null()) { + RID shader_rid = shader.is_valid() ? shader->get_rid() : RID(); + RID next_pass_rid; + if (get_next_pass().is_valid()) { + next_pass_rid = get_next_pass()->get_rid(); + } + + _set_material(RS::get_singleton()->material_create_from_shader(next_pass_rid, get_render_priority(), shader_rid)); + + for (KeyValue param : param_cache) { + if (param.value.get_type() == Variant::OBJECT) { + RID tex_rid = param.value; + if (tex_rid.is_valid()) { + RS::get_singleton()->material_set_param(_get_material(), param.key, tex_rid); + } else { + RS::get_singleton()->material_set_param(_get_material(), param.key, Variant()); + } + } else { + RS::get_singleton()->material_set_param(_get_material(), param.key, param.value); + } + } + } +} + void ShaderMaterial::_bind_methods() { ClassDB::bind_method(D_METHOD("set_shader", "shader"), &ShaderMaterial::set_shader); ClassDB::bind_method(D_METHOD("get_shader"), &ShaderMaterial::get_shader); @@ -511,6 +555,12 @@ Shader::Mode ShaderMaterial::get_shader_mode() const { return Shader::MODE_SPATIAL; } } + +RID ShaderMaterial::get_rid() const { + _check_material_rid(); + return Material::get_rid(); +} + RID ShaderMaterial::get_shader_rid() const { if (shader.is_valid()) { return shader->get_rid(); @@ -520,6 +570,7 @@ RID ShaderMaterial::get_shader_rid() const { } ShaderMaterial::ShaderMaterial() { + // Material RID will be empty until it is required. } ShaderMaterial::~ShaderMaterial() { @@ -527,9 +578,8 @@ ShaderMaterial::~ShaderMaterial() { ///////////////////////////////// -Mutex BaseMaterial3D::material_mutex; -SelfList::List BaseMaterial3D::dirty_materials; HashMap BaseMaterial3D::shader_map; +Mutex BaseMaterial3D::shader_map_mutex; BaseMaterial3D::ShaderNames *BaseMaterial3D::shader_names = nullptr; void BaseMaterial3D::init_shaders() { @@ -619,22 +669,31 @@ HashMap> BaseMaterial3D::materials_for_2d; void BaseMaterial3D::finish_shaders() { materials_for_2d.clear(); - dirty_materials.clear(); - memdelete(shader_names); shader_names = nullptr; } +void BaseMaterial3D::_mark_dirty() { + dirty = true; +} + void BaseMaterial3D::_update_shader() { + if (!dirty) { + return; + } + + dirty = false; + MaterialKey mk = _compute_key(); if (mk == current_key) { return; //no update required in the end } + MutexLock lock(shader_map_mutex); if (shader_map.has(current_key)) { shader_map[current_key].users--; if (shader_map[current_key].users == 0) { - //deallocate shader, as it's no longer in use + // Deallocate shader which is no longer in use. RS::get_singleton()->free(shader_map[current_key].shader); shader_map.erase(current_key); } @@ -643,8 +702,13 @@ void BaseMaterial3D::_update_shader() { current_key = mk; if (shader_map.has(mk)) { - RS::get_singleton()->material_set_shader(_get_material(), shader_map[mk].shader); + shader_rid = shader_map[mk].shader; shader_map[mk].users++; + + if (_get_material().is_valid()) { + RS::get_singleton()->material_set_shader(_get_material(), shader_rid); + } + return; } @@ -1866,37 +1930,45 @@ void fragment() {)"; code += "}\n"; ShaderData shader_data; - shader_data.shader = RS::get_singleton()->shader_create(); + shader_data.shader = RS::get_singleton()->shader_create_from_code(code); shader_data.users = 1; - - RS::get_singleton()->shader_set_code(shader_data.shader, code); - shader_map[mk] = shader_data; + shader_rid = shader_data.shader; - RS::get_singleton()->material_set_shader(_get_material(), shader_data.shader); + if (_get_material().is_valid()) { + RS::get_singleton()->material_set_shader(_get_material(), shader_rid); + } } -void BaseMaterial3D::flush_changes() { - MutexLock lock(material_mutex); +void BaseMaterial3D::_check_material_rid() { + MutexLock lock(material_rid_mutex); + if (_get_material().is_null()) { + RID next_pass_rid; + if (get_next_pass().is_valid()) { + next_pass_rid = get_next_pass()->get_rid(); + } + + _set_material(RS::get_singleton()->material_create_from_shader(next_pass_rid, get_render_priority(), shader_rid)); - while (dirty_materials.first()) { - dirty_materials.first()->self()->_update_shader(); - dirty_materials.first()->remove_from_list(); + for (KeyValue param : pending_params) { + RS::get_singleton()->material_set_param(_get_material(), param.key, param.value); + } + + pending_params.clear(); } } -void BaseMaterial3D::_queue_shader_change() { - MutexLock lock(material_mutex); - - if (_is_initialized() && !element.in_list()) { - dirty_materials.add(&element); +void BaseMaterial3D::_material_set_param(const StringName &p_name, const Variant &p_value) { + if (_get_material().is_valid()) { + RS::get_singleton()->material_set_param(_get_material(), p_name, p_value); + } else { + pending_params[p_name] = p_value; } } void BaseMaterial3D::set_albedo(const Color &p_albedo) { albedo = p_albedo; - - RS::get_singleton()->material_set_param(_get_material(), shader_names->albedo, p_albedo); + _material_set_param(shader_names->albedo, p_albedo); } Color BaseMaterial3D::get_albedo() const { @@ -1905,7 +1977,7 @@ Color BaseMaterial3D::get_albedo() const { void BaseMaterial3D::set_specular(float p_specular) { specular = p_specular; - RS::get_singleton()->material_set_param(_get_material(), shader_names->specular, p_specular); + _material_set_param(shader_names->specular, p_specular); } float BaseMaterial3D::get_specular() const { @@ -1914,7 +1986,7 @@ float BaseMaterial3D::get_specular() const { void BaseMaterial3D::set_roughness(float p_roughness) { roughness = p_roughness; - RS::get_singleton()->material_set_param(_get_material(), shader_names->roughness, p_roughness); + _material_set_param(shader_names->roughness, p_roughness); } float BaseMaterial3D::get_roughness() const { @@ -1923,7 +1995,7 @@ float BaseMaterial3D::get_roughness() const { void BaseMaterial3D::set_metallic(float p_metallic) { metallic = p_metallic; - RS::get_singleton()->material_set_param(_get_material(), shader_names->metallic, p_metallic); + _material_set_param(shader_names->metallic, p_metallic); } float BaseMaterial3D::get_metallic() const { @@ -1932,7 +2004,7 @@ float BaseMaterial3D::get_metallic() const { void BaseMaterial3D::set_emission(const Color &p_emission) { emission = p_emission; - RS::get_singleton()->material_set_param(_get_material(), shader_names->emission, p_emission); + _material_set_param(shader_names->emission, p_emission); } Color BaseMaterial3D::get_emission() const { @@ -1941,10 +2013,11 @@ Color BaseMaterial3D::get_emission() const { void BaseMaterial3D::set_emission_energy_multiplier(float p_emission_energy_multiplier) { emission_energy_multiplier = p_emission_energy_multiplier; + if (GLOBAL_GET("rendering/lights_and_shadows/use_physical_light_units")) { - RS::get_singleton()->material_set_param(_get_material(), shader_names->emission_energy, p_emission_energy_multiplier * emission_intensity); + _material_set_param(shader_names->emission_energy, p_emission_energy_multiplier * emission_intensity); } else { - RS::get_singleton()->material_set_param(_get_material(), shader_names->emission_energy, p_emission_energy_multiplier); + _material_set_param(shader_names->emission_energy, p_emission_energy_multiplier); } } @@ -1955,7 +2028,7 @@ float BaseMaterial3D::get_emission_energy_multiplier() const { void BaseMaterial3D::set_emission_intensity(float p_emission_intensity) { ERR_FAIL_COND_EDMSG(!GLOBAL_GET("rendering/lights_and_shadows/use_physical_light_units"), "Cannot set material emission intensity when Physical Light Units disabled."); emission_intensity = p_emission_intensity; - RS::get_singleton()->material_set_param(_get_material(), shader_names->emission_energy, emission_energy_multiplier * emission_intensity); + _material_set_param(shader_names->emission_energy, emission_energy_multiplier * emission_intensity); } float BaseMaterial3D::get_emission_intensity() const { @@ -1964,7 +2037,7 @@ float BaseMaterial3D::get_emission_intensity() const { void BaseMaterial3D::set_normal_scale(float p_normal_scale) { normal_scale = p_normal_scale; - RS::get_singleton()->material_set_param(_get_material(), shader_names->normal_scale, p_normal_scale); + _material_set_param(shader_names->normal_scale, p_normal_scale); } float BaseMaterial3D::get_normal_scale() const { @@ -1973,7 +2046,7 @@ float BaseMaterial3D::get_normal_scale() const { void BaseMaterial3D::set_rim(float p_rim) { rim = p_rim; - RS::get_singleton()->material_set_param(_get_material(), shader_names->rim, p_rim); + _material_set_param(shader_names->rim, p_rim); } float BaseMaterial3D::get_rim() const { @@ -1982,7 +2055,7 @@ float BaseMaterial3D::get_rim() const { void BaseMaterial3D::set_rim_tint(float p_rim_tint) { rim_tint = p_rim_tint; - RS::get_singleton()->material_set_param(_get_material(), shader_names->rim_tint, p_rim_tint); + _material_set_param(shader_names->rim_tint, p_rim_tint); } float BaseMaterial3D::get_rim_tint() const { @@ -1991,7 +2064,7 @@ float BaseMaterial3D::get_rim_tint() const { void BaseMaterial3D::set_ao_light_affect(float p_ao_light_affect) { ao_light_affect = p_ao_light_affect; - RS::get_singleton()->material_set_param(_get_material(), shader_names->ao_light_affect, p_ao_light_affect); + _material_set_param(shader_names->ao_light_affect, p_ao_light_affect); } float BaseMaterial3D::get_ao_light_affect() const { @@ -2000,7 +2073,7 @@ float BaseMaterial3D::get_ao_light_affect() const { void BaseMaterial3D::set_clearcoat(float p_clearcoat) { clearcoat = p_clearcoat; - RS::get_singleton()->material_set_param(_get_material(), shader_names->clearcoat, p_clearcoat); + _material_set_param(shader_names->clearcoat, p_clearcoat); } float BaseMaterial3D::get_clearcoat() const { @@ -2009,7 +2082,7 @@ float BaseMaterial3D::get_clearcoat() const { void BaseMaterial3D::set_clearcoat_roughness(float p_clearcoat_roughness) { clearcoat_roughness = p_clearcoat_roughness; - RS::get_singleton()->material_set_param(_get_material(), shader_names->clearcoat_roughness, p_clearcoat_roughness); + _material_set_param(shader_names->clearcoat_roughness, p_clearcoat_roughness); } float BaseMaterial3D::get_clearcoat_roughness() const { @@ -2018,7 +2091,7 @@ float BaseMaterial3D::get_clearcoat_roughness() const { void BaseMaterial3D::set_anisotropy(float p_anisotropy) { anisotropy = p_anisotropy; - RS::get_singleton()->material_set_param(_get_material(), shader_names->anisotropy, p_anisotropy); + _material_set_param(shader_names->anisotropy, p_anisotropy); } float BaseMaterial3D::get_anisotropy() const { @@ -2027,7 +2100,7 @@ float BaseMaterial3D::get_anisotropy() const { void BaseMaterial3D::set_heightmap_scale(float p_heightmap_scale) { heightmap_scale = p_heightmap_scale; - RS::get_singleton()->material_set_param(_get_material(), shader_names->heightmap_scale, p_heightmap_scale); + _material_set_param(shader_names->heightmap_scale, p_heightmap_scale); } float BaseMaterial3D::get_heightmap_scale() const { @@ -2036,7 +2109,7 @@ float BaseMaterial3D::get_heightmap_scale() const { void BaseMaterial3D::set_subsurface_scattering_strength(float p_subsurface_scattering_strength) { subsurface_scattering_strength = p_subsurface_scattering_strength; - RS::get_singleton()->material_set_param(_get_material(), shader_names->subsurface_scattering_strength, subsurface_scattering_strength); + _material_set_param(shader_names->subsurface_scattering_strength, subsurface_scattering_strength); } float BaseMaterial3D::get_subsurface_scattering_strength() const { @@ -2045,7 +2118,7 @@ float BaseMaterial3D::get_subsurface_scattering_strength() const { void BaseMaterial3D::set_transmittance_color(const Color &p_color) { transmittance_color = p_color; - RS::get_singleton()->material_set_param(_get_material(), shader_names->transmittance_color, p_color); + _material_set_param(shader_names->transmittance_color, p_color); } Color BaseMaterial3D::get_transmittance_color() const { @@ -2054,7 +2127,7 @@ Color BaseMaterial3D::get_transmittance_color() const { void BaseMaterial3D::set_transmittance_depth(float p_depth) { transmittance_depth = p_depth; - RS::get_singleton()->material_set_param(_get_material(), shader_names->transmittance_depth, p_depth); + _material_set_param(shader_names->transmittance_depth, p_depth); } float BaseMaterial3D::get_transmittance_depth() const { @@ -2063,7 +2136,7 @@ float BaseMaterial3D::get_transmittance_depth() const { void BaseMaterial3D::set_transmittance_boost(float p_boost) { transmittance_boost = p_boost; - RS::get_singleton()->material_set_param(_get_material(), shader_names->transmittance_boost, p_boost); + _material_set_param(shader_names->transmittance_boost, p_boost); } float BaseMaterial3D::get_transmittance_boost() const { @@ -2072,7 +2145,7 @@ float BaseMaterial3D::get_transmittance_boost() const { void BaseMaterial3D::set_backlight(const Color &p_backlight) { backlight = p_backlight; - RS::get_singleton()->material_set_param(_get_material(), shader_names->backlight, backlight); + _material_set_param(shader_names->backlight, backlight); } Color BaseMaterial3D::get_backlight() const { @@ -2081,7 +2154,7 @@ Color BaseMaterial3D::get_backlight() const { void BaseMaterial3D::set_refraction(float p_refraction) { refraction = p_refraction; - RS::get_singleton()->material_set_param(_get_material(), shader_names->refraction, refraction); + _material_set_param(shader_names->refraction, refraction); } float BaseMaterial3D::get_refraction() const { @@ -2094,7 +2167,7 @@ void BaseMaterial3D::set_detail_uv(DetailUV p_detail_uv) { } detail_uv = p_detail_uv; - _queue_shader_change(); + _mark_dirty(); } BaseMaterial3D::DetailUV BaseMaterial3D::get_detail_uv() const { @@ -2107,7 +2180,7 @@ void BaseMaterial3D::set_blend_mode(BlendMode p_mode) { } blend_mode = p_mode; - _queue_shader_change(); + _mark_dirty(); } BaseMaterial3D::BlendMode BaseMaterial3D::get_blend_mode() const { @@ -2116,7 +2189,7 @@ BaseMaterial3D::BlendMode BaseMaterial3D::get_blend_mode() const { void BaseMaterial3D::set_detail_blend_mode(BlendMode p_mode) { detail_blend_mode = p_mode; - _queue_shader_change(); + _mark_dirty(); } BaseMaterial3D::BlendMode BaseMaterial3D::get_detail_blend_mode() const { @@ -2129,7 +2202,7 @@ void BaseMaterial3D::set_transparency(Transparency p_transparency) { } transparency = p_transparency; - _queue_shader_change(); + _mark_dirty(); notify_property_list_changed(); } @@ -2143,7 +2216,7 @@ void BaseMaterial3D::set_alpha_antialiasing(AlphaAntiAliasing p_alpha_aa) { } alpha_antialiasing_mode = p_alpha_aa; - _queue_shader_change(); + _mark_dirty(); notify_property_list_changed(); } @@ -2157,7 +2230,7 @@ void BaseMaterial3D::set_shading_mode(ShadingMode p_shading_mode) { } shading_mode = p_shading_mode; - _queue_shader_change(); + _mark_dirty(); notify_property_list_changed(); } @@ -2171,7 +2244,7 @@ void BaseMaterial3D::set_depth_draw_mode(DepthDrawMode p_mode) { } depth_draw_mode = p_mode; - _queue_shader_change(); + _mark_dirty(); } BaseMaterial3D::DepthDrawMode BaseMaterial3D::get_depth_draw_mode() const { @@ -2184,7 +2257,7 @@ void BaseMaterial3D::set_cull_mode(CullMode p_mode) { } cull_mode = p_mode; - _queue_shader_change(); + _mark_dirty(); } BaseMaterial3D::CullMode BaseMaterial3D::get_cull_mode() const { @@ -2197,7 +2270,7 @@ void BaseMaterial3D::set_diffuse_mode(DiffuseMode p_mode) { } diffuse_mode = p_mode; - _queue_shader_change(); + _mark_dirty(); } BaseMaterial3D::DiffuseMode BaseMaterial3D::get_diffuse_mode() const { @@ -2210,7 +2283,7 @@ void BaseMaterial3D::set_specular_mode(SpecularMode p_mode) { } specular_mode = p_mode; - _queue_shader_change(); + _mark_dirty(); } BaseMaterial3D::SpecularMode BaseMaterial3D::get_specular_mode() const { @@ -2240,7 +2313,7 @@ void BaseMaterial3D::set_flag(Flags p_flag, bool p_enabled) { update_configuration_warning(); } - _queue_shader_change(); + _mark_dirty(); } bool BaseMaterial3D::get_flag(Flags p_flag) const { @@ -2256,7 +2329,7 @@ void BaseMaterial3D::set_feature(Feature p_feature, bool p_enabled) { features[p_feature] = p_enabled; notify_property_list_changed(); - _queue_shader_change(); + _mark_dirty(); } bool BaseMaterial3D::get_feature(Feature p_feature) const { @@ -2269,15 +2342,14 @@ void BaseMaterial3D::set_texture(TextureParam p_param, const Ref &p_t textures[p_param] = p_texture; Variant rid = p_texture.is_valid() ? Variant(p_texture->get_rid()) : Variant(); - RS::get_singleton()->material_set_param(_get_material(), shader_names->texture_names[p_param], rid); + _material_set_param(shader_names->texture_names[p_param], rid); if (p_texture.is_valid() && p_param == TEXTURE_ALBEDO) { - RS::get_singleton()->material_set_param(_get_material(), shader_names->albedo_texture_size, - Vector2i(p_texture->get_width(), p_texture->get_height())); + _material_set_param(shader_names->albedo_texture_size, Vector2i(p_texture->get_width(), p_texture->get_height())); } notify_property_list_changed(); - _queue_shader_change(); + _mark_dirty(); } Ref BaseMaterial3D::get_texture(TextureParam p_param) const { @@ -2297,7 +2369,7 @@ Ref BaseMaterial3D::get_texture_by_name(const StringName &p_name) con void BaseMaterial3D::set_texture_filter(TextureFilter p_filter) { texture_filter = p_filter; - _queue_shader_change(); + _mark_dirty(); } BaseMaterial3D::TextureFilter BaseMaterial3D::get_texture_filter() const { @@ -2469,7 +2541,7 @@ void BaseMaterial3D::_validate_property(PropertyInfo &p_property) const { void BaseMaterial3D::set_point_size(float p_point_size) { point_size = p_point_size; - RS::get_singleton()->material_set_param(_get_material(), shader_names->point_size, p_point_size); + _material_set_param(shader_names->point_size, p_point_size); } float BaseMaterial3D::get_point_size() const { @@ -2478,7 +2550,7 @@ float BaseMaterial3D::get_point_size() const { void BaseMaterial3D::set_uv1_scale(const Vector3 &p_scale) { uv1_scale = p_scale; - RS::get_singleton()->material_set_param(_get_material(), shader_names->uv1_scale, p_scale); + _material_set_param(shader_names->uv1_scale, p_scale); } Vector3 BaseMaterial3D::get_uv1_scale() const { @@ -2487,7 +2559,7 @@ Vector3 BaseMaterial3D::get_uv1_scale() const { void BaseMaterial3D::set_uv1_offset(const Vector3 &p_offset) { uv1_offset = p_offset; - RS::get_singleton()->material_set_param(_get_material(), shader_names->uv1_offset, p_offset); + _material_set_param(shader_names->uv1_offset, p_offset); } Vector3 BaseMaterial3D::get_uv1_offset() const { @@ -2497,7 +2569,7 @@ Vector3 BaseMaterial3D::get_uv1_offset() const { void BaseMaterial3D::set_uv1_triplanar_blend_sharpness(float p_sharpness) { // Negative values or values higher than 150 can result in NaNs, leading to broken rendering. uv1_triplanar_sharpness = CLAMP(p_sharpness, 0.0, 150.0); - RS::get_singleton()->material_set_param(_get_material(), shader_names->uv1_blend_sharpness, uv1_triplanar_sharpness); + _material_set_param(shader_names->uv1_blend_sharpness, uv1_triplanar_sharpness); } float BaseMaterial3D::get_uv1_triplanar_blend_sharpness() const { @@ -2506,7 +2578,7 @@ float BaseMaterial3D::get_uv1_triplanar_blend_sharpness() const { void BaseMaterial3D::set_uv2_scale(const Vector3 &p_scale) { uv2_scale = p_scale; - RS::get_singleton()->material_set_param(_get_material(), shader_names->uv2_scale, p_scale); + _material_set_param(shader_names->uv2_scale, p_scale); } Vector3 BaseMaterial3D::get_uv2_scale() const { @@ -2515,7 +2587,7 @@ Vector3 BaseMaterial3D::get_uv2_scale() const { void BaseMaterial3D::set_uv2_offset(const Vector3 &p_offset) { uv2_offset = p_offset; - RS::get_singleton()->material_set_param(_get_material(), shader_names->uv2_offset, p_offset); + _material_set_param(shader_names->uv2_offset, p_offset); } Vector3 BaseMaterial3D::get_uv2_offset() const { @@ -2525,7 +2597,7 @@ Vector3 BaseMaterial3D::get_uv2_offset() const { void BaseMaterial3D::set_uv2_triplanar_blend_sharpness(float p_sharpness) { // Negative values or values higher than 150 can result in NaNs, leading to broken rendering. uv2_triplanar_sharpness = CLAMP(p_sharpness, 0.0, 150.0); - RS::get_singleton()->material_set_param(_get_material(), shader_names->uv2_blend_sharpness, uv2_triplanar_sharpness); + _material_set_param(shader_names->uv2_blend_sharpness, uv2_triplanar_sharpness); } float BaseMaterial3D::get_uv2_triplanar_blend_sharpness() const { @@ -2534,7 +2606,7 @@ float BaseMaterial3D::get_uv2_triplanar_blend_sharpness() const { void BaseMaterial3D::set_billboard_mode(BillboardMode p_mode) { billboard_mode = p_mode; - _queue_shader_change(); + _mark_dirty(); notify_property_list_changed(); } @@ -2544,7 +2616,7 @@ BaseMaterial3D::BillboardMode BaseMaterial3D::get_billboard_mode() const { void BaseMaterial3D::set_particles_anim_h_frames(int p_frames) { particles_anim_h_frames = p_frames; - RS::get_singleton()->material_set_param(_get_material(), shader_names->particles_anim_h_frames, p_frames); + _material_set_param(shader_names->particles_anim_h_frames, p_frames); } int BaseMaterial3D::get_particles_anim_h_frames() const { @@ -2553,7 +2625,7 @@ int BaseMaterial3D::get_particles_anim_h_frames() const { void BaseMaterial3D::set_particles_anim_v_frames(int p_frames) { particles_anim_v_frames = p_frames; - RS::get_singleton()->material_set_param(_get_material(), shader_names->particles_anim_v_frames, p_frames); + _material_set_param(shader_names->particles_anim_v_frames, p_frames); } int BaseMaterial3D::get_particles_anim_v_frames() const { @@ -2562,7 +2634,7 @@ int BaseMaterial3D::get_particles_anim_v_frames() const { void BaseMaterial3D::set_particles_anim_loop(bool p_loop) { particles_anim_loop = p_loop; - RS::get_singleton()->material_set_param(_get_material(), shader_names->particles_anim_loop, particles_anim_loop); + _material_set_param(shader_names->particles_anim_loop, particles_anim_loop); } bool BaseMaterial3D::get_particles_anim_loop() const { @@ -2571,7 +2643,7 @@ bool BaseMaterial3D::get_particles_anim_loop() const { void BaseMaterial3D::set_heightmap_deep_parallax(bool p_enable) { deep_parallax = p_enable; - _queue_shader_change(); + _mark_dirty(); notify_property_list_changed(); } @@ -2581,7 +2653,7 @@ bool BaseMaterial3D::is_heightmap_deep_parallax_enabled() const { void BaseMaterial3D::set_heightmap_deep_parallax_min_layers(int p_layer) { deep_parallax_min_layers = p_layer; - RS::get_singleton()->material_set_param(_get_material(), shader_names->heightmap_min_layers, p_layer); + _material_set_param(shader_names->heightmap_min_layers, p_layer); } int BaseMaterial3D::get_heightmap_deep_parallax_min_layers() const { @@ -2590,7 +2662,7 @@ int BaseMaterial3D::get_heightmap_deep_parallax_min_layers() const { void BaseMaterial3D::set_heightmap_deep_parallax_max_layers(int p_layer) { deep_parallax_max_layers = p_layer; - RS::get_singleton()->material_set_param(_get_material(), shader_names->heightmap_max_layers, p_layer); + _material_set_param(shader_names->heightmap_max_layers, p_layer); } int BaseMaterial3D::get_heightmap_deep_parallax_max_layers() const { @@ -2599,7 +2671,7 @@ int BaseMaterial3D::get_heightmap_deep_parallax_max_layers() const { void BaseMaterial3D::set_heightmap_deep_parallax_flip_tangent(bool p_flip) { heightmap_parallax_flip_tangent = p_flip; - RS::get_singleton()->material_set_param(_get_material(), shader_names->heightmap_flip, Vector2(heightmap_parallax_flip_tangent ? -1 : 1, heightmap_parallax_flip_binormal ? -1 : 1)); + _material_set_param(shader_names->heightmap_flip, Vector2(heightmap_parallax_flip_tangent ? -1 : 1, heightmap_parallax_flip_binormal ? -1 : 1)); } bool BaseMaterial3D::get_heightmap_deep_parallax_flip_tangent() const { @@ -2608,7 +2680,7 @@ bool BaseMaterial3D::get_heightmap_deep_parallax_flip_tangent() const { void BaseMaterial3D::set_heightmap_deep_parallax_flip_binormal(bool p_flip) { heightmap_parallax_flip_binormal = p_flip; - RS::get_singleton()->material_set_param(_get_material(), shader_names->heightmap_flip, Vector2(heightmap_parallax_flip_tangent ? -1 : 1, heightmap_parallax_flip_binormal ? -1 : 1)); + _material_set_param(shader_names->heightmap_flip, Vector2(heightmap_parallax_flip_tangent ? -1 : 1, heightmap_parallax_flip_binormal ? -1 : 1)); } bool BaseMaterial3D::get_heightmap_deep_parallax_flip_binormal() const { @@ -2617,7 +2689,7 @@ bool BaseMaterial3D::get_heightmap_deep_parallax_flip_binormal() const { void BaseMaterial3D::set_grow_enabled(bool p_enable) { grow_enabled = p_enable; - _queue_shader_change(); + _mark_dirty(); notify_property_list_changed(); } @@ -2627,7 +2699,7 @@ bool BaseMaterial3D::is_grow_enabled() const { void BaseMaterial3D::set_alpha_scissor_threshold(float p_threshold) { alpha_scissor_threshold = p_threshold; - RS::get_singleton()->material_set_param(_get_material(), shader_names->alpha_scissor_threshold, p_threshold); + _material_set_param(shader_names->alpha_scissor_threshold, p_threshold); } float BaseMaterial3D::get_alpha_scissor_threshold() const { @@ -2636,7 +2708,7 @@ float BaseMaterial3D::get_alpha_scissor_threshold() const { void BaseMaterial3D::set_alpha_hash_scale(float p_scale) { alpha_hash_scale = p_scale; - RS::get_singleton()->material_set_param(_get_material(), shader_names->alpha_hash_scale, p_scale); + _material_set_param(shader_names->alpha_hash_scale, p_scale); } float BaseMaterial3D::get_alpha_hash_scale() const { @@ -2645,7 +2717,7 @@ float BaseMaterial3D::get_alpha_hash_scale() const { void BaseMaterial3D::set_alpha_antialiasing_edge(float p_edge) { alpha_antialiasing_edge = p_edge; - RS::get_singleton()->material_set_param(_get_material(), shader_names->alpha_antialiasing_edge, p_edge); + _material_set_param(shader_names->alpha_antialiasing_edge, p_edge); } float BaseMaterial3D::get_alpha_antialiasing_edge() const { @@ -2654,7 +2726,7 @@ float BaseMaterial3D::get_alpha_antialiasing_edge() const { void BaseMaterial3D::set_grow(float p_grow) { grow = p_grow; - RS::get_singleton()->material_set_param(_get_material(), shader_names->grow, p_grow); + _material_set_param(shader_names->grow, p_grow); } float BaseMaterial3D::get_grow() const { @@ -2676,7 +2748,7 @@ static Plane _get_texture_mask(BaseMaterial3D::TextureChannel p_channel) { void BaseMaterial3D::set_metallic_texture_channel(TextureChannel p_channel) { ERR_FAIL_INDEX(p_channel, 5); metallic_texture_channel = p_channel; - RS::get_singleton()->material_set_param(_get_material(), shader_names->metallic_texture_channel, _get_texture_mask(p_channel)); + _material_set_param(shader_names->metallic_texture_channel, _get_texture_mask(p_channel)); } BaseMaterial3D::TextureChannel BaseMaterial3D::get_metallic_texture_channel() const { @@ -2686,7 +2758,7 @@ BaseMaterial3D::TextureChannel BaseMaterial3D::get_metallic_texture_channel() co void BaseMaterial3D::set_roughness_texture_channel(TextureChannel p_channel) { ERR_FAIL_INDEX(p_channel, 5); roughness_texture_channel = p_channel; - _queue_shader_change(); + _mark_dirty(); } BaseMaterial3D::TextureChannel BaseMaterial3D::get_roughness_texture_channel() const { @@ -2696,7 +2768,7 @@ BaseMaterial3D::TextureChannel BaseMaterial3D::get_roughness_texture_channel() c void BaseMaterial3D::set_ao_texture_channel(TextureChannel p_channel) { ERR_FAIL_INDEX(p_channel, 5); ao_texture_channel = p_channel; - RS::get_singleton()->material_set_param(_get_material(), shader_names->ao_texture_channel, _get_texture_mask(p_channel)); + _material_set_param(shader_names->ao_texture_channel, _get_texture_mask(p_channel)); } BaseMaterial3D::TextureChannel BaseMaterial3D::get_ao_texture_channel() const { @@ -2706,7 +2778,7 @@ BaseMaterial3D::TextureChannel BaseMaterial3D::get_ao_texture_channel() const { void BaseMaterial3D::set_refraction_texture_channel(TextureChannel p_channel) { ERR_FAIL_INDEX(p_channel, 5); refraction_texture_channel = p_channel; - RS::get_singleton()->material_set_param(_get_material(), shader_names->refraction_texture_channel, _get_texture_mask(p_channel)); + _material_set_param(shader_names->refraction_texture_channel, _get_texture_mask(p_channel)); } BaseMaterial3D::TextureChannel BaseMaterial3D::get_refraction_texture_channel() const { @@ -2768,7 +2840,7 @@ void BaseMaterial3D::set_on_top_of_alpha() { void BaseMaterial3D::set_proximity_fade_enabled(bool p_enable) { proximity_fade_enabled = p_enable; - _queue_shader_change(); + _mark_dirty(); notify_property_list_changed(); } @@ -2778,7 +2850,7 @@ bool BaseMaterial3D::is_proximity_fade_enabled() const { void BaseMaterial3D::set_proximity_fade_distance(float p_distance) { proximity_fade_distance = MAX(p_distance, 0.01); - RS::get_singleton()->material_set_param(_get_material(), shader_names->proximity_fade_distance, proximity_fade_distance); + _material_set_param(shader_names->proximity_fade_distance, proximity_fade_distance); } float BaseMaterial3D::get_proximity_fade_distance() const { @@ -2787,7 +2859,7 @@ float BaseMaterial3D::get_proximity_fade_distance() const { void BaseMaterial3D::set_msdf_pixel_range(float p_range) { msdf_pixel_range = p_range; - RS::get_singleton()->material_set_param(_get_material(), shader_names->msdf_pixel_range, p_range); + _material_set_param(shader_names->msdf_pixel_range, p_range); } float BaseMaterial3D::get_msdf_pixel_range() const { @@ -2796,7 +2868,7 @@ float BaseMaterial3D::get_msdf_pixel_range() const { void BaseMaterial3D::set_msdf_outline_size(float p_size) { msdf_outline_size = p_size; - RS::get_singleton()->material_set_param(_get_material(), shader_names->msdf_outline_size, p_size); + _material_set_param(shader_names->msdf_outline_size, p_size); } float BaseMaterial3D::get_msdf_outline_size() const { @@ -2805,7 +2877,7 @@ float BaseMaterial3D::get_msdf_outline_size() const { void BaseMaterial3D::set_distance_fade(DistanceFadeMode p_mode) { distance_fade = p_mode; - _queue_shader_change(); + _mark_dirty(); notify_property_list_changed(); } @@ -2815,7 +2887,7 @@ BaseMaterial3D::DistanceFadeMode BaseMaterial3D::get_distance_fade() const { void BaseMaterial3D::set_distance_fade_max_distance(float p_distance) { distance_fade_max_distance = p_distance; - RS::get_singleton()->material_set_param(_get_material(), shader_names->distance_fade_max, distance_fade_max_distance); + _material_set_param(shader_names->distance_fade_max, distance_fade_max_distance); } float BaseMaterial3D::get_distance_fade_max_distance() const { @@ -2824,7 +2896,7 @@ float BaseMaterial3D::get_distance_fade_max_distance() const { void BaseMaterial3D::set_distance_fade_min_distance(float p_distance) { distance_fade_min_distance = p_distance; - RS::get_singleton()->material_set_param(_get_material(), shader_names->distance_fade_min, distance_fade_min_distance); + _material_set_param(shader_names->distance_fade_min, distance_fade_min_distance); } float BaseMaterial3D::get_distance_fade_min_distance() const { @@ -2836,20 +2908,22 @@ void BaseMaterial3D::set_emission_operator(EmissionOperator p_op) { return; } emission_op = p_op; - _queue_shader_change(); + _mark_dirty(); } BaseMaterial3D::EmissionOperator BaseMaterial3D::get_emission_operator() const { return emission_op; } +RID BaseMaterial3D::get_rid() const { + const_cast(this)->_update_shader(); + const_cast(this)->_check_material_rid(); + return _get_material(); +} + RID BaseMaterial3D::get_shader_rid() const { - MutexLock lock(material_mutex); - if (element.in_list()) { - ((BaseMaterial3D *)this)->_update_shader(); - } - ERR_FAIL_COND_V(!shader_map.has(current_key), RID()); - return shader_map[current_key].shader; + const_cast(this)->_update_shader(); + return shader_rid; } Shader::Mode BaseMaterial3D::get_shader_mode() const { @@ -3365,8 +3439,7 @@ void BaseMaterial3D::_bind_methods() { BIND_ENUM_CONSTANT(DISTANCE_FADE_OBJECT_DITHER); } -BaseMaterial3D::BaseMaterial3D(bool p_orm) : - element(this) { +BaseMaterial3D::BaseMaterial3D(bool p_orm) { orm = p_orm; // Initialize to the same values as the shader set_albedo(Color(1.0, 1.0, 1.0, 1.0)); @@ -3433,21 +3506,25 @@ BaseMaterial3D::BaseMaterial3D(bool p_orm) : current_key.invalid_key = 1; - _mark_initialized(callable_mp(this, &BaseMaterial3D::_queue_shader_change), callable_mp(this, &BaseMaterial3D::_update_shader)); + _mark_dirty(); } BaseMaterial3D::~BaseMaterial3D() { - ERR_FAIL_NULL(RenderingServer::get_singleton()); - MutexLock lock(material_mutex); + ERR_FAIL_NULL(RS::get_singleton()); - if (shader_map.has(current_key)) { - shader_map[current_key].users--; - if (shader_map[current_key].users == 0) { - //deallocate shader, as it's no longer in use - RS::get_singleton()->free(shader_map[current_key].shader); - shader_map.erase(current_key); + { + MutexLock lock(shader_map_mutex); + if (shader_map.has(current_key)) { + shader_map[current_key].users--; + if (shader_map[current_key].users == 0) { + // Deallocate shader which is no longer in use. + RS::get_singleton()->free(shader_map[current_key].shader); + shader_map.erase(current_key); + } } + } + if (_get_material().is_valid()) { RS::get_singleton()->material_set_shader(_get_material(), RID()); } } diff --git a/scene/resources/material.h b/scene/resources/material.h index 50a774e961f8..c20df24178e1 100644 --- a/scene/resources/material.h +++ b/scene/resources/material.h @@ -42,7 +42,7 @@ class Material : public Resource { RES_BASE_EXTENSION("material") OBJ_SAVE_TYPE(Material); - RID material; + mutable RID material; Ref next_pass; int render_priority; @@ -55,6 +55,7 @@ class Material : public Resource { void inspect_native_shader_code(); protected: + _FORCE_INLINE_ void _set_material(RID p_material) const { material = p_material; } _FORCE_INLINE_ RID _get_material() const { return material; } static void _bind_methods(); virtual bool _can_do_next_pass() const; @@ -97,6 +98,7 @@ class ShaderMaterial : public Material { mutable HashMap remap_cache; mutable HashMap param_cache; + mutable Mutex material_rid_mutex; protected: bool _set(const StringName &p_name, const Variant &p_value); @@ -115,6 +117,7 @@ class ShaderMaterial : public Material { virtual bool _can_use_render_priority() const override; void _shader_changed(); + void _check_material_rid() const; public: void set_shader(const Ref &p_shader); @@ -125,6 +128,7 @@ class ShaderMaterial : public Material { virtual Shader::Mode get_shader_mode() const override; + virtual RID get_rid() const override; virtual RID get_shader_rid() const override; ShaderMaterial(); @@ -136,6 +140,9 @@ class StandardMaterial3D; class BaseMaterial3D : public Material { GDCLASS(BaseMaterial3D, Material); +private: + mutable Mutex material_rid_mutex; + public: enum TextureParam { TEXTURE_ALBEDO, @@ -361,6 +368,7 @@ class BaseMaterial3D : public Material { }; static HashMap shader_map; + static Mutex shader_map_mutex; MaterialKey current_key; @@ -459,16 +467,17 @@ class BaseMaterial3D : public Material { StringName albedo_texture_size; }; - static Mutex material_mutex; - static SelfList::List dirty_materials; static ShaderNames *shader_names; - SelfList element; - + void _mark_dirty(); void _update_shader(); - _FORCE_INLINE_ void _queue_shader_change(); + void _check_material_rid(); + void _material_set_param(const StringName &p_name, const Variant &p_value); bool orm; + bool dirty = true; + RID shader_rid; + HashMap pending_params; Color albedo; float specular = 0.0f; @@ -771,10 +780,10 @@ class BaseMaterial3D : public Material { static void init_shaders(); static void finish_shaders(); - static void flush_changes(); static Ref get_material_for_2d(bool p_shaded, Transparency p_transparency, bool p_double_sided, bool p_billboard = false, bool p_billboard_y = false, bool p_msdf = false, bool p_no_depth = false, bool p_fixed_size = false, TextureFilter p_filter = TEXTURE_FILTER_LINEAR_WITH_MIPMAPS, AlphaAntiAliasing p_alpha_antialiasing_mode = ALPHA_ANTIALIASING_OFF, RID *r_shader_rid = nullptr); + virtual RID get_rid() const override; virtual RID get_shader_rid() const override; virtual Shader::Mode get_shader_mode() const override; diff --git a/scene/resources/particle_process_material.cpp b/scene/resources/particle_process_material.cpp index 8cfe4c92b72e..09bc1fa8e425 100644 --- a/scene/resources/particle_process_material.cpp +++ b/scene/resources/particle_process_material.cpp @@ -2261,6 +2261,8 @@ void ParticleProcessMaterial::_bind_methods() { ParticleProcessMaterial::ParticleProcessMaterial() : element(this) { + _set_material(RS::get_singleton()->material_create()); + set_direction(Vector3(1, 0, 0)); set_spread(45); set_flatness(0); diff --git a/scene/resources/shader.cpp b/scene/resources/shader.cpp index 46d38146a67f..01db1968d21f 100644 --- a/scene/resources/shader.cpp +++ b/scene/resources/shader.cpp @@ -50,6 +50,14 @@ Shader::Mode Shader::get_mode() const { return mode; } +void Shader::_check_shader_rid() const { + MutexLock lock(shader_rid_mutex); + if (shader_rid.is_null() && !pp_code.is_empty()) { + shader_rid = RenderingServer::get_singleton()->shader_create_from_code(pp_code, get_path()); + pp_code = String(); + } +} + void Shader::_dependency_changed() { // Preprocess and compile the code again because a dependency has changed. It also calls emit_changed() for us. _recompile(); @@ -61,7 +69,10 @@ void Shader::_recompile() { void Shader::set_path(const String &p_path, bool p_take_over) { Resource::set_path(p_path, p_take_over); - RS::get_singleton()->shader_set_path_hint(shader, p_path); + + if (shader_rid.is_valid()) { + RS::get_singleton()->shader_set_path_hint(shader_rid, p_path); + } } void Shader::set_include_path(const String &p_path) { @@ -76,7 +87,7 @@ void Shader::set_code(const String &p_code) { } code = p_code; - String pp_code = p_code; + pp_code = p_code; { String path = get_path(); @@ -114,7 +125,10 @@ void Shader::set_code(const String &p_code) { E->connect_changed(callable_mp(this, &Shader::_dependency_changed)); } - RenderingServer::get_singleton()->shader_set_code(shader, pp_code); + if (shader_rid.is_valid()) { + RenderingServer::get_singleton()->shader_set_code(shader_rid, pp_code); + pp_code = String(); + } emit_changed(); } @@ -126,9 +140,10 @@ String Shader::get_code() const { void Shader::get_shader_uniform_list(List *p_params, bool p_get_groups) const { _update_shader(); + _check_shader_rid(); List local; - RenderingServer::get_singleton()->get_shader_parameter_list(shader, &local); + RenderingServer::get_singleton()->get_shader_parameter_list(shader_rid, &local); #ifdef TOOLS_ENABLED DocData::ClassDoc class_doc; @@ -182,17 +197,20 @@ void Shader::get_shader_uniform_list(List *p_params, bool p_get_gr RID Shader::get_rid() const { _update_shader(); + _check_shader_rid(); - return shader; + return shader_rid; } void Shader::set_default_texture_parameter(const StringName &p_name, const Ref &p_texture, int p_index) { + _check_shader_rid(); + if (p_texture.is_valid()) { if (!default_textures.has(p_name)) { default_textures[p_name] = HashMap>(); } default_textures[p_name][p_index] = p_texture; - RS::get_singleton()->shader_set_default_texture_parameter(shader, p_name, p_texture->get_rid(), p_index); + RS::get_singleton()->shader_set_default_texture_parameter(shader_rid, p_name, p_texture->get_rid(), p_index); } else { if (default_textures.has(p_name) && default_textures[p_name].has(p_index)) { default_textures[p_name].erase(p_index); @@ -201,7 +219,7 @@ void Shader::set_default_texture_parameter(const StringName &p_name, const Refshader_set_default_texture_parameter(shader, p_name, RID(), p_index); + RS::get_singleton()->shader_set_default_texture_parameter(shader_rid, p_name, RID(), p_index); } emit_changed(); @@ -225,6 +243,7 @@ bool Shader::is_text_shader() const { } void Shader::_update_shader() const { + // Base implementation does nothing. } Array Shader::_get_shader_uniform_list(bool p_get_groups) { @@ -258,12 +277,14 @@ void Shader::_bind_methods() { } Shader::Shader() { - shader = RenderingServer::get_singleton()->shader_create(); + // Shader RID will be empty until it is required. } Shader::~Shader() { - ERR_FAIL_NULL(RenderingServer::get_singleton()); - RenderingServer::get_singleton()->free(shader); + if (shader_rid.is_valid()) { + ERR_FAIL_NULL(RenderingServer::get_singleton()); + RenderingServer::get_singleton()->free(shader_rid); + } } //////////// diff --git a/scene/resources/shader.h b/scene/resources/shader.h index 682fbd7ea6a7..2ffe598f0cb7 100644 --- a/scene/resources/shader.h +++ b/scene/resources/shader.h @@ -52,7 +52,10 @@ class Shader : public Resource { }; private: - RID shader; + mutable RID shader_rid; + mutable String pp_code; + mutable Mutex shader_rid_mutex; + Mode mode = MODE_SPATIAL; HashSet> include_dependencies; String code; @@ -60,6 +63,7 @@ class Shader : public Resource { HashMap>> default_textures; + void _check_shader_rid() const; void _dependency_changed(); void _recompile(); virtual void _update_shader() const; //used for visual shader diff --git a/servers/rendering/dummy/rasterizer_canvas_dummy.h b/servers/rendering/dummy/rasterizer_canvas_dummy.h index a450b2a21d2e..d61ee1bdb6e3 100644 --- a/servers/rendering/dummy/rasterizer_canvas_dummy.h +++ b/servers/rendering/dummy/rasterizer_canvas_dummy.h @@ -56,6 +56,7 @@ class RasterizerCanvasDummy : public RendererCanvasRender { void update() override {} virtual void set_debug_redraw(bool p_enabled, double p_time, const Color &p_color) override {} + virtual uint32_t get_pipeline_compilations(RS::PipelineSource p_source) override { return 0; } RasterizerCanvasDummy() {} ~RasterizerCanvasDummy() {} diff --git a/servers/rendering/dummy/rasterizer_scene_dummy.h b/servers/rendering/dummy/rasterizer_scene_dummy.h index a699a58b1fb4..f129c8674604 100644 --- a/servers/rendering/dummy/rasterizer_scene_dummy.h +++ b/servers/rendering/dummy/rasterizer_scene_dummy.h @@ -94,6 +94,11 @@ class RasterizerSceneDummy : public RendererSceneRender { uint32_t geometry_instance_get_pair_mask() override { return 0; } + /* PIPELINES */ + + virtual void mesh_generate_pipelines(RID p_mesh, bool p_background_compilation) override {} + virtual uint32_t get_pipeline_compilations(RS::PipelineSource p_source) override { return 0; } + /* SDFGI UPDATE */ void sdfgi_update(const Ref &p_render_buffers, RID p_environment, const Vector3 &p_world_position) override {} diff --git a/servers/rendering/dummy/storage/material_storage.cpp b/servers/rendering/dummy/storage/material_storage.cpp index e8b553ca7636..66927e96095e 100644 --- a/servers/rendering/dummy/storage/material_storage.cpp +++ b/servers/rendering/dummy/storage/material_storage.cpp @@ -44,6 +44,10 @@ MaterialStorage::~MaterialStorage() { singleton = nullptr; } +bool MaterialStorage::can_create_resources_async() const { + return false; +} + RID MaterialStorage::shader_allocate() { return shader_owner.allocate_rid(); } diff --git a/servers/rendering/dummy/storage/material_storage.h b/servers/rendering/dummy/storage/material_storage.h index e4c58474e25c..9385058b85fb 100644 --- a/servers/rendering/dummy/storage/material_storage.h +++ b/servers/rendering/dummy/storage/material_storage.h @@ -56,6 +56,8 @@ class MaterialStorage : public RendererMaterialStorage { MaterialStorage(); ~MaterialStorage(); + virtual bool can_create_resources_async() const override; + /* GLOBAL SHADER UNIFORM API */ virtual void global_shader_parameter_add(const StringName &p_name, RS::GlobalShaderParameterType p_type, const Variant &p_value) override {} diff --git a/servers/rendering/renderer_canvas_render.h b/servers/rendering/renderer_canvas_render.h index c57abee1659f..328fe32ea6c4 100644 --- a/servers/rendering/renderer_canvas_render.h +++ b/servers/rendering/renderer_canvas_render.h @@ -545,6 +545,7 @@ class RendererCanvasRender { virtual void update() = 0; virtual void set_debug_redraw(bool p_enabled, double p_time, const Color &p_color) = 0; + virtual uint32_t get_pipeline_compilations(RS::PipelineSource p_source) = 0; RendererCanvasRender() { ERR_FAIL_COND_MSG(singleton != nullptr, "A RendererCanvasRender singleton already exists."); diff --git a/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp b/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp index aca85ce49720..d9d49feef199 100644 --- a/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp +++ b/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp @@ -31,6 +31,7 @@ #include "render_forward_clustered.h" #include "core/config/project_settings.h" #include "core/object/worker_thread_pool.h" +#include "scene/resources/material.h" #include "servers/rendering/renderer_rd/framebuffer_cache_rd.h" #include "servers/rendering/renderer_rd/renderer_compositor_rd.h" #include "servers/rendering/renderer_rd/storage_rd/light_storage.h" @@ -43,23 +44,18 @@ using namespace RendererSceneRenderImplementation; +#define PRELOAD_PIPELINES_ON_SURFACE_CACHE_CONSTRUCTION 1 + +#define FADE_ALPHA_PASS_THRESHOLD 0.999 + void RenderForwardClustered::RenderBufferDataForwardClustered::ensure_specular() { ERR_FAIL_NULL(render_buffers); if (!render_buffers->has_texture(RB_SCOPE_FORWARD_CLUSTERED, RB_TEX_SPECULAR)) { - RD::DataFormat format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT; - uint32_t usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; - if (render_buffers->get_msaa_3d() != RS::VIEWPORT_MSAA_DISABLED) { - usage_bits |= RD::TEXTURE_USAGE_CAN_COPY_TO_BIT; - } else { - usage_bits |= RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT; - } - - render_buffers->create_texture(RB_SCOPE_FORWARD_CLUSTERED, RB_TEX_SPECULAR, format, usage_bits); - - if (render_buffers->get_msaa_3d() != RS::VIEWPORT_MSAA_DISABLED) { - usage_bits = RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT; - render_buffers->create_texture(RB_SCOPE_FORWARD_CLUSTERED, RB_TEX_SPECULAR_MSAA, format, usage_bits, render_buffers->get_texture_samples()); + bool msaa = render_buffers->get_msaa_3d() != RS::VIEWPORT_MSAA_DISABLED; + render_buffers->create_texture(RB_SCOPE_FORWARD_CLUSTERED, RB_TEX_SPECULAR, get_specular_format(), get_specular_usage_bits(msaa, false, render_buffers->get_can_be_storage())); + if (msaa) { + render_buffers->create_texture(RB_SCOPE_FORWARD_CLUSTERED, RB_TEX_SPECULAR_MSAA, get_specular_format(), get_specular_usage_bits(false, msaa, render_buffers->get_can_be_storage()), render_buffers->get_texture_samples()); } } } @@ -68,20 +64,10 @@ void RenderForwardClustered::RenderBufferDataForwardClustered::ensure_normal_rou ERR_FAIL_NULL(render_buffers); if (!render_buffers->has_texture(RB_SCOPE_FORWARD_CLUSTERED, RB_TEX_NORMAL_ROUGHNESS)) { - RD::DataFormat format = RD::DATA_FORMAT_R8G8B8A8_UNORM; - uint32_t usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; - - if (render_buffers->get_msaa_3d() != RS::VIEWPORT_MSAA_DISABLED) { - usage_bits |= RD::TEXTURE_USAGE_CAN_COPY_TO_BIT; - } else { - usage_bits |= RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT; - } - - render_buffers->create_texture(RB_SCOPE_FORWARD_CLUSTERED, RB_TEX_NORMAL_ROUGHNESS, format, usage_bits); - - if (render_buffers->get_msaa_3d() != RS::VIEWPORT_MSAA_DISABLED) { - usage_bits = RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT | RD::TEXTURE_USAGE_SAMPLING_BIT; - render_buffers->create_texture(RB_SCOPE_FORWARD_CLUSTERED, RB_TEX_NORMAL_ROUGHNESS_MSAA, format, usage_bits, render_buffers->get_texture_samples()); + bool msaa = render_buffers->get_msaa_3d() != RS::VIEWPORT_MSAA_DISABLED; + render_buffers->create_texture(RB_SCOPE_FORWARD_CLUSTERED, RB_TEX_NORMAL_ROUGHNESS, get_normal_roughness_format(), get_normal_roughness_usage_bits(msaa, false, render_buffers->get_can_be_storage())); + if (msaa) { + render_buffers->create_texture(RB_SCOPE_FORWARD_CLUSTERED, RB_TEX_NORMAL_ROUGHNESS_MSAA, get_normal_roughness_format(), get_normal_roughness_usage_bits(false, msaa, render_buffers->get_can_be_storage()), render_buffers->get_texture_samples()); } } } @@ -90,17 +76,10 @@ void RenderForwardClustered::RenderBufferDataForwardClustered::ensure_voxelgi() ERR_FAIL_NULL(render_buffers); if (!render_buffers->has_texture(RB_SCOPE_FORWARD_CLUSTERED, RB_TEX_VOXEL_GI)) { - RD::DataFormat format = RD::DATA_FORMAT_R8G8_UINT; - uint32_t usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; - if (render_buffers->get_msaa_3d() == RS::VIEWPORT_MSAA_DISABLED) { - usage_bits |= RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT; - } - - render_buffers->create_texture(RB_SCOPE_FORWARD_CLUSTERED, RB_TEX_VOXEL_GI, format, usage_bits); - - if (render_buffers->get_msaa_3d() != RS::VIEWPORT_MSAA_DISABLED) { - usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT; - render_buffers->create_texture(RB_SCOPE_FORWARD_CLUSTERED, RB_TEX_VOXEL_GI_MSAA, format, usage_bits, render_buffers->get_texture_samples()); + bool msaa = render_buffers->get_msaa_3d() != RS::VIEWPORT_MSAA_DISABLED; + render_buffers->create_texture(RB_SCOPE_FORWARD_CLUSTERED, RB_TEX_VOXEL_GI, get_voxelgi_format(), get_voxelgi_usage_bits(msaa, false, render_buffers->get_can_be_storage())); + if (msaa) { + render_buffers->create_texture(RB_SCOPE_FORWARD_CLUSTERED, RB_TEX_VOXEL_GI_MSAA, get_voxelgi_format(), get_voxelgi_usage_bits(false, msaa, render_buffers->get_can_be_storage()), render_buffers->get_texture_samples()); } } } @@ -249,6 +228,30 @@ RID RenderForwardClustered::RenderBufferDataForwardClustered::get_velocity_only_ return FramebufferCacheRD::get_singleton()->get_cache_multiview(render_buffers->get_view_count(), velocity); } +RD::DataFormat RenderForwardClustered::RenderBufferDataForwardClustered::get_specular_format() { + return RD::DATA_FORMAT_R16G16B16A16_SFLOAT; +} + +uint32_t RenderForwardClustered::RenderBufferDataForwardClustered::get_specular_usage_bits(bool p_resolve, bool p_msaa, bool p_storage) { + return RenderSceneBuffersRD::get_color_usage_bits(p_resolve, p_msaa, p_storage); +} + +RD::DataFormat RenderForwardClustered::RenderBufferDataForwardClustered::get_normal_roughness_format() { + return RD::DATA_FORMAT_R8G8B8A8_UNORM; +} + +uint32_t RenderForwardClustered::RenderBufferDataForwardClustered::get_normal_roughness_usage_bits(bool p_resolve, bool p_msaa, bool p_storage) { + return RenderSceneBuffersRD::get_color_usage_bits(p_resolve, p_msaa, p_storage); +} + +RD::DataFormat RenderForwardClustered::RenderBufferDataForwardClustered::get_voxelgi_format() { + return RD::DATA_FORMAT_R8G8_UINT; +} + +uint32_t RenderForwardClustered::RenderBufferDataForwardClustered::get_voxelgi_usage_bits(bool p_resolve, bool p_msaa, bool p_storage) { + return RenderSceneBuffersRD::get_color_usage_bits(p_resolve, p_msaa, p_storage); +} + void RenderForwardClustered::setup_render_buffer_data(Ref p_render_buffers) { Ref data; data.instantiate(); @@ -266,6 +269,12 @@ bool RenderForwardClustered::free(RID p_rid) { return false; } +void RenderForwardClustered::update() { + RendererSceneRenderRD::update(); + _update_global_pipeline_data_requirements_from_project(); + _update_global_pipeline_data_requirements_from_light_storage(); +} + /// RENDERING /// template @@ -284,9 +293,14 @@ void RenderForwardClustered::_render_list_template(RenderingDevice::DrawListID p RID prev_vertex_array_rd; RID prev_index_array_rd; - RID prev_pipeline_rd; RID prev_xforms_uniform_set; + SceneShaderForwardClustered::ShaderData *shader = nullptr; + SceneShaderForwardClustered::ShaderData *prev_shader = nullptr; + SceneShaderForwardClustered::ShaderData::PipelineKey pipeline_key; + uint32_t pipeline_hash = 0; + uint32_t prev_pipeline_hash = 0; + bool shadow_pass = (p_pass_mode == PASS_MODE_SHADOW) || (p_pass_mode == PASS_MODE_SHADOW_DP); SceneState::PushConstant push_constant; @@ -317,7 +331,6 @@ void RenderForwardClustered::_render_list_template(RenderingDevice::DrawListID p push_constant.base_index = i + p_params->element_offset; RID material_uniform_set; - SceneShaderForwardClustered::ShaderData *shader; void *mesh_surface; if (shadow_pass || p_pass_mode == PASS_MODE_DEPTH) { //regular depth pass can use these too @@ -356,113 +369,146 @@ void RenderForwardClustered::_render_list_template(RenderingDevice::DrawListID p should_request_redraw = true; } - //find cull variant - SceneShaderForwardClustered::ShaderData::CullVariant cull_variant; - - if (p_pass_mode == PASS_MODE_DEPTH_MATERIAL || p_pass_mode == PASS_MODE_SDF || ((p_pass_mode == PASS_MODE_SHADOW || p_pass_mode == PASS_MODE_SHADOW_DP) && surf->flags & GeometryInstanceSurfaceDataCache::FLAG_USES_DOUBLE_SIDED_SHADOWS)) { + // Determine the cull variant. + SceneShaderForwardClustered::ShaderData::CullVariant cull_variant = SceneShaderForwardClustered::ShaderData::CULL_VARIANT_MAX; + if constexpr (p_pass_mode == PASS_MODE_DEPTH_MATERIAL || p_pass_mode == PASS_MODE_SDF) { cull_variant = SceneShaderForwardClustered::ShaderData::CULL_VARIANT_DOUBLE_SIDED; } else { - bool mirror = surf->owner->mirror; - if (p_params->reverse_cull) { - mirror = !mirror; + if constexpr (p_pass_mode == PASS_MODE_SHADOW || p_pass_mode == PASS_MODE_SHADOW_DP) { + if (surf->flags & GeometryInstanceSurfaceDataCache::FLAG_USES_DOUBLE_SIDED_SHADOWS) { + cull_variant = SceneShaderForwardClustered::ShaderData::CULL_VARIANT_DOUBLE_SIDED; + } + } + + if (cull_variant == SceneShaderForwardClustered::ShaderData::CULL_VARIANT_MAX) { + bool mirror = surf->owner->mirror; + if (p_params->reverse_cull) { + mirror = !mirror; + } + + cull_variant = mirror ? SceneShaderForwardClustered::ShaderData::CULL_VARIANT_REVERSED : SceneShaderForwardClustered::ShaderData::CULL_VARIANT_NORMAL; } - cull_variant = mirror ? SceneShaderForwardClustered::ShaderData::CULL_VARIANT_REVERSED : SceneShaderForwardClustered::ShaderData::CULL_VARIANT_NORMAL; } - RS::PrimitiveType primitive = surf->primitive; + pipeline_key.primitive_type = surf->primitive; + RID xforms_uniform_set = surf->owner->transforms_uniform_set; - SceneShaderForwardClustered::PipelineVersion pipeline_version = SceneShaderForwardClustered::PIPELINE_VERSION_MAX; // Assigned to silence wrong -Wmaybe-initialized. - uint32_t pipeline_color_pass_flags = 0; - uint32_t pipeline_specialization = p_params->spec_constant_base_flags; + SceneShaderForwardClustered::ShaderSpecialization pipeline_specialization = p_params->base_specialization; if constexpr (p_pass_mode == PASS_MODE_COLOR) { - if (element_info.uses_softshadow) { - pipeline_specialization |= SceneShaderForwardClustered::SHADER_SPECIALIZATION_SOFT_SHADOWS; - } - if (element_info.uses_projector) { - pipeline_specialization |= SceneShaderForwardClustered::SHADER_SPECIALIZATION_PROJECTOR; - } - - if (p_params->use_directional_soft_shadow) { - pipeline_specialization |= SceneShaderForwardClustered::SHADER_SPECIALIZATION_DIRECTIONAL_SOFT_SHADOWS; - } + pipeline_specialization.use_light_soft_shadows = element_info.uses_softshadow; + pipeline_specialization.use_light_projector = element_info.uses_projector; + pipeline_specialization.use_directional_soft_shadows = p_params->use_directional_soft_shadow; } + pipeline_key.color_pass_flags = 0; + switch (p_pass_mode) { case PASS_MODE_COLOR: { if (element_info.uses_lightmap) { - pipeline_color_pass_flags |= SceneShaderForwardClustered::PIPELINE_COLOR_PASS_FLAG_LIGHTMAP; + pipeline_key.color_pass_flags |= SceneShaderForwardClustered::PIPELINE_COLOR_PASS_FLAG_LIGHTMAP; } else { - if (element_info.uses_forward_gi) { - pipeline_specialization |= SceneShaderForwardClustered::SHADER_SPECIALIZATION_FORWARD_GI; - } + pipeline_specialization.use_forward_gi = element_info.uses_forward_gi; } if constexpr ((p_color_pass_flags & COLOR_PASS_FLAG_SEPARATE_SPECULAR) != 0) { - pipeline_color_pass_flags |= SceneShaderForwardClustered::PIPELINE_COLOR_PASS_FLAG_SEPARATE_SPECULAR; + pipeline_key.color_pass_flags |= SceneShaderForwardClustered::PIPELINE_COLOR_PASS_FLAG_SEPARATE_SPECULAR; } if constexpr ((p_color_pass_flags & COLOR_PASS_FLAG_MOTION_VECTORS) != 0) { - pipeline_color_pass_flags |= SceneShaderForwardClustered::PIPELINE_COLOR_PASS_FLAG_MOTION_VECTORS; + pipeline_key.color_pass_flags |= SceneShaderForwardClustered::PIPELINE_COLOR_PASS_FLAG_MOTION_VECTORS; } if constexpr ((p_color_pass_flags & COLOR_PASS_FLAG_TRANSPARENT) != 0) { - pipeline_color_pass_flags |= SceneShaderForwardClustered::PIPELINE_COLOR_PASS_FLAG_TRANSPARENT; + pipeline_key.color_pass_flags |= SceneShaderForwardClustered::PIPELINE_COLOR_PASS_FLAG_TRANSPARENT; } if constexpr ((p_color_pass_flags & COLOR_PASS_FLAG_MULTIVIEW) != 0) { - pipeline_color_pass_flags |= SceneShaderForwardClustered::PIPELINE_COLOR_PASS_FLAG_MULTIVIEW; + pipeline_key.color_pass_flags |= SceneShaderForwardClustered::PIPELINE_COLOR_PASS_FLAG_MULTIVIEW; } - pipeline_version = SceneShaderForwardClustered::PIPELINE_VERSION_COLOR_PASS; + pipeline_key.version = SceneShaderForwardClustered::PIPELINE_VERSION_COLOR_PASS; } break; case PASS_MODE_SHADOW: case PASS_MODE_DEPTH: { - pipeline_version = p_params->view_count > 1 ? SceneShaderForwardClustered::PIPELINE_VERSION_DEPTH_PASS_MULTIVIEW : SceneShaderForwardClustered::PIPELINE_VERSION_DEPTH_PASS; + pipeline_key.version = p_params->view_count > 1 ? SceneShaderForwardClustered::PIPELINE_VERSION_DEPTH_PASS_MULTIVIEW : SceneShaderForwardClustered::PIPELINE_VERSION_DEPTH_PASS; } break; case PASS_MODE_SHADOW_DP: { ERR_FAIL_COND_MSG(p_params->view_count > 1, "Multiview not supported for shadow DP pass"); - pipeline_version = SceneShaderForwardClustered::PIPELINE_VERSION_DEPTH_PASS_DP; + pipeline_key.version = SceneShaderForwardClustered::PIPELINE_VERSION_DEPTH_PASS_DP; } break; case PASS_MODE_DEPTH_NORMAL_ROUGHNESS: { - pipeline_version = p_params->view_count > 1 ? SceneShaderForwardClustered::PIPELINE_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS_MULTIVIEW : SceneShaderForwardClustered::PIPELINE_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS; + pipeline_key.version = p_params->view_count > 1 ? SceneShaderForwardClustered::PIPELINE_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS_MULTIVIEW : SceneShaderForwardClustered::PIPELINE_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS; } break; case PASS_MODE_DEPTH_NORMAL_ROUGHNESS_VOXEL_GI: { - pipeline_version = p_params->view_count > 1 ? SceneShaderForwardClustered::PIPELINE_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS_AND_VOXEL_GI_MULTIVIEW : SceneShaderForwardClustered::PIPELINE_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS_AND_VOXEL_GI; + pipeline_key.version = p_params->view_count > 1 ? SceneShaderForwardClustered::PIPELINE_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS_AND_VOXEL_GI_MULTIVIEW : SceneShaderForwardClustered::PIPELINE_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS_AND_VOXEL_GI; } break; case PASS_MODE_DEPTH_MATERIAL: { ERR_FAIL_COND_MSG(p_params->view_count > 1, "Multiview not supported for material pass"); - pipeline_version = SceneShaderForwardClustered::PIPELINE_VERSION_DEPTH_PASS_WITH_MATERIAL; + pipeline_key.version = SceneShaderForwardClustered::PIPELINE_VERSION_DEPTH_PASS_WITH_MATERIAL; } break; case PASS_MODE_SDF: { // Note, SDF is prepared in world space, this shouldn't be a multiview buffer even when stereoscopic rendering is used. ERR_FAIL_COND_MSG(p_params->view_count > 1, "Multiview not supported for SDF pass"); - pipeline_version = SceneShaderForwardClustered::PIPELINE_VERSION_DEPTH_PASS_WITH_SDF; + pipeline_key.version = SceneShaderForwardClustered::PIPELINE_VERSION_DEPTH_PASS_WITH_SDF; } break; } - PipelineCacheRD *pipeline = nullptr; + pipeline_key.framebuffer_format_id = framebuffer_format; + pipeline_key.wireframe = p_params->force_wireframe; + pipeline_key.ubershader = 0; - if constexpr (p_pass_mode == PASS_MODE_COLOR) { - pipeline = &shader->color_pipelines[cull_variant][primitive][pipeline_color_pass_flags]; - } else { - pipeline = &shader->pipelines[cull_variant][primitive][pipeline_version]; - } - - RD::VertexFormatID vertex_format = -1; + const RD::PolygonCullMode cull_mode = shader->get_cull_mode_from_cull_variant(cull_variant); RID vertex_array_rd; RID index_array_rd; + RID pipeline_rd; + uint32_t ubershader_iterations = 2; + if constexpr (p_pass_mode == PASS_MODE_DEPTH_MATERIAL || p_pass_mode == PASS_MODE_SDF) { + ubershader_iterations = 1; + } + + while (pipeline_key.ubershader < ubershader_iterations) { + // Skeleton and blend shape. + RD::VertexFormatID vertex_format = -1; + bool pipeline_motion_vectors = pipeline_key.color_pass_flags & SceneShaderForwardClustered::PIPELINE_COLOR_PASS_FLAG_MOTION_VECTORS; + uint64_t input_mask = shader->get_vertex_input_mask(pipeline_key.version, pipeline_key.color_pass_flags, pipeline_key.ubershader); + if (surf->owner->mesh_instance.is_valid()) { + mesh_storage->mesh_instance_surface_get_vertex_arrays_and_format(surf->owner->mesh_instance, surf->surface_index, input_mask, pipeline_motion_vectors, vertex_array_rd, vertex_format); + } else { + mesh_storage->mesh_surface_get_vertex_arrays_and_format(mesh_surface, input_mask, pipeline_motion_vectors, vertex_array_rd, vertex_format); + } - //skeleton and blend shape - bool pipeline_motion_vectors = pipeline_color_pass_flags & SceneShaderForwardClustered::PIPELINE_COLOR_PASS_FLAG_MOTION_VECTORS; - if (surf->owner->mesh_instance.is_valid()) { - mesh_storage->mesh_instance_surface_get_vertex_arrays_and_format(surf->owner->mesh_instance, surf->surface_index, pipeline->get_vertex_input_mask(), pipeline_motion_vectors, vertex_array_rd, vertex_format); - } else { - mesh_storage->mesh_surface_get_vertex_arrays_and_format(mesh_surface, pipeline->get_vertex_input_mask(), pipeline_motion_vectors, vertex_array_rd, vertex_format); - } + index_array_rd = mesh_storage->mesh_surface_get_index_array(mesh_surface, element_info.lod_index); + pipeline_key.vertex_format_id = vertex_format; + + if (pipeline_key.ubershader) { + pipeline_key.shader_specialization = {}; + pipeline_key.cull_mode = RD::POLYGON_CULL_DISABLED; + } else { + pipeline_key.shader_specialization = pipeline_specialization; + pipeline_key.cull_mode = cull_mode; + } + + pipeline_hash = pipeline_key.hash(); - index_array_rd = mesh_storage->mesh_surface_get_index_array(mesh_surface, element_info.lod_index); + if (shader != prev_shader || pipeline_hash != prev_pipeline_hash) { + bool wait_for_compilation = (ubershader_iterations == 1) || pipeline_key.ubershader; + RS::PipelineSource pipeline_source = wait_for_compilation ? RS::PIPELINE_SOURCE_DRAW : RS::PIPELINE_SOURCE_SPECIALIZATION; + pipeline_rd = shader->pipeline_hash_map.get_pipeline(pipeline_key, pipeline_hash, wait_for_compilation, pipeline_source); + + if (pipeline_rd.is_valid()) { + prev_shader = shader; + prev_pipeline_hash = pipeline_hash; + break; + } else { + pipeline_key.ubershader++; + } + } else { + // The same pipeline is bound already. + break; + } + } if (prev_vertex_array_rd != vertex_array_rd) { RD::get_singleton()->draw_list_bind_vertex_array(draw_list, vertex_array_rd); @@ -476,13 +522,8 @@ void RenderForwardClustered::_render_list_template(RenderingDevice::DrawListID p prev_index_array_rd = index_array_rd; } - RID pipeline_rd = pipeline->get_render_pipeline(vertex_format, framebuffer_format, p_params->force_wireframe, 0, pipeline_specialization); - - if (pipeline_rd != prev_pipeline_rd) { - // checking with prev shader does not make so much sense, as - // the pipeline may still be different. + if (!pipeline_rd.is_null()) { RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, pipeline_rd); - prev_pipeline_rd = pipeline_rd; } if (xforms_uniform_set.is_valid() && prev_xforms_uniform_set != xforms_uniform_set) { @@ -508,7 +549,17 @@ void RenderForwardClustered::_render_list_template(RenderingDevice::DrawListID p push_constant.multimesh_motion_vectors_previous_offset = 0; } - RD::get_singleton()->draw_list_set_push_constant(draw_list, &push_constant, sizeof(SceneState::PushConstant)); + size_t push_constant_size = 0; + if (pipeline_key.ubershader) { + push_constant_size = sizeof(SceneState::PushConstant); + push_constant.ubershader.specialization = pipeline_specialization; + push_constant.ubershader.constants = {}; + push_constant.ubershader.constants.cull_mode = cull_mode; + } else { + push_constant_size = sizeof(SceneState::PushConstant) - sizeof(SceneState::PushConstantUbershader); + } + + RD::get_singleton()->draw_list_set_push_constant(draw_list, &push_constant, push_constant_size); uint32_t instance_count = surf->owner->instance_count > 1 ? surf->owner->instance_count : element_info.repeat; if (surf->flags & GeometryInstanceSurfaceDataCache::FLAG_USES_PARTICLE_TRAILS) { @@ -575,6 +626,9 @@ void RenderForwardClustered::_render_list(RenderingDevice::DrawListID p_draw_lis case PASS_MODE_SDF: { _render_list_template(p_draw_list, p_framebuffer_Format, p_params, p_from_element, p_to_element); } break; + default: { + // Unknown pass mode. + } break; } } @@ -1013,7 +1067,7 @@ void RenderForwardClustered::_fill_render_list(RenderListType p_render_list, con bool force_alpha = false; #endif - if (fade_alpha < 0.999) { + if (fade_alpha < FADE_ALPHA_PASS_THRESHOLD) { force_alpha = true; } @@ -1691,12 +1745,18 @@ void RenderForwardClustered::_render_scene(RenderDataRD *p_render_data, const Co reverse_cull = true; // for some reason our views are inverted samplers = RendererRD::MaterialStorage::get_singleton()->samplers_rd_get_default(); + + // Indicate pipelines for reflection probes are required. + global_pipeline_data_required.use_reflection_probes = true; } else { screen_size = rb->get_internal_size(); if (p_render_data->scene_data->calculate_motion_vectors) { color_pass_flags |= COLOR_PASS_FLAG_MOTION_VECTORS; scene_shader.enable_advanced_shader_group(); + + // Indicate pipelines for motion vectors are required. + global_pipeline_data_required.use_motion_vectors = true; } if (p_render_data->voxel_gi_instances->size() > 0) { @@ -1718,6 +1778,9 @@ void RenderForwardClustered::_render_scene(RenderDataRD *p_render_data, const Co color_pass_flags |= COLOR_PASS_FLAG_MULTIVIEW; // Try enabling here in case is_xr_enabled() returns false. scene_shader.shader.enable_group(SceneShaderForwardClustered::SHADER_GROUP_MULTIVIEW); + + // Indicate pipelines for multiview are required. + global_pipeline_data_required.use_multiview = true; } color_framebuffer = rb_data->get_color_pass_fb(color_pass_flags); @@ -1736,18 +1799,6 @@ void RenderForwardClustered::_render_scene(RenderDataRD *p_render_data, const Co // May have changed due to the above (light buffer enlarged, as an example). _update_render_base_uniform_set(); - _fill_render_list(RENDER_LIST_OPAQUE, p_render_data, PASS_MODE_COLOR, using_sdfgi, using_sdfgi || using_voxelgi, using_motion_pass); - render_list[RENDER_LIST_OPAQUE].sort_by_key(); - render_list[RENDER_LIST_MOTION].sort_by_key(); - render_list[RENDER_LIST_ALPHA].sort_by_reverse_depth_and_priority(); - - int *render_info = p_render_data->render_info ? p_render_data->render_info->info[RS::VIEWPORT_RENDER_INFO_TYPE_VISIBLE] : (int *)nullptr; - _fill_instance_data(RENDER_LIST_OPAQUE, render_info); - _fill_instance_data(RENDER_LIST_MOTION, render_info); - _fill_instance_data(RENDER_LIST_ALPHA, render_info); - - RD::get_singleton()->draw_command_end_label(); - if (!is_reflection_probe) { if (using_voxelgi) { depth_pass_mode = PASS_MODE_DEPTH_NORMAL_ROUGHNESS_VOXEL_GI; @@ -1795,6 +1846,35 @@ void RenderForwardClustered::_render_scene(RenderDataRD *p_render_data, const Co scene_shader.enable_advanced_shader_group(p_render_data->scene_data->view_count > 1); } + // Update the global pipeline requirements with all the features found to be in use in this scene. + if (depth_pass_mode == PASS_MODE_DEPTH_NORMAL_ROUGHNESS) { + global_pipeline_data_required.use_normal_and_roughness = true; + } + + if (scene_state.used_lightmap) { + global_pipeline_data_required.use_lightmaps = true; + } + + if (using_voxelgi) { + global_pipeline_data_required.use_voxelgi = true; + } + + if (using_separate_specular) { + global_pipeline_data_required.use_separate_specular = true; + } + + _fill_render_list(RENDER_LIST_OPAQUE, p_render_data, PASS_MODE_COLOR, using_sdfgi, using_sdfgi || using_voxelgi, using_motion_pass); + render_list[RENDER_LIST_OPAQUE].sort_by_key(); + render_list[RENDER_LIST_MOTION].sort_by_key(); + render_list[RENDER_LIST_ALPHA].sort_by_reverse_depth_and_priority(); + + int *render_info = p_render_data->render_info ? p_render_data->render_info->info[RS::VIEWPORT_RENDER_INFO_TYPE_VISIBLE] : (int *)nullptr; + _fill_instance_data(RENDER_LIST_OPAQUE, render_info); + _fill_instance_data(RENDER_LIST_MOTION, render_info); + _fill_instance_data(RENDER_LIST_ALPHA, render_info); + + RD::get_singleton()->draw_command_end_label(); + RID radiance_texture; bool draw_sky = false; bool draw_sky_fog_only = false; @@ -1911,12 +1991,8 @@ void RenderForwardClustered::_render_scene(RenderDataRD *p_render_data, const Co bool debug_sdfgi_probes = get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_SDFGI_PROBES; bool depth_pre_pass = bool(GLOBAL_GET("rendering/driver/depth_prepass/enable")) && depth_framebuffer.is_valid(); - uint32_t spec_constant_base_flags = 0; - { - if (p_render_data->environment.is_valid() && environment_get_fog_mode(p_render_data->environment) == RS::EnvironmentFogMode::ENV_FOG_MODE_DEPTH) { - spec_constant_base_flags |= 1 << SPEC_CONSTANT_USE_DEPTH_FOG; - } - } + SceneShaderForwardClustered::ShaderSpecialization base_specialization = scene_shader.default_specialization; + base_specialization.use_depth_fog = p_render_data->environment.is_valid() && environment_get_fog_mode(p_render_data->environment) == RS::EnvironmentFogMode::ENV_FOG_MODE_DEPTH; bool using_ssao = depth_pre_pass && !is_reflection_probe && p_render_data->environment.is_valid() && environment_get_ssao_enabled(p_render_data->environment); @@ -1940,7 +2016,7 @@ void RenderForwardClustered::_render_scene(RenderDataRD *p_render_data, const Co RID rp_uniform_set = _setup_render_pass_uniform_set(RENDER_LIST_OPAQUE, nullptr, RID(), samplers); bool finish_depth = using_ssao || using_ssil || using_sdfgi || using_voxelgi || ce_pre_opaque_resolved_depth || ce_post_opaque_resolved_depth; - RenderListParameters render_list_params(render_list[RENDER_LIST_OPAQUE].elements.ptr(), render_list[RENDER_LIST_OPAQUE].element_info.ptr(), render_list[RENDER_LIST_OPAQUE].elements.size(), reverse_cull, depth_pass_mode, 0, rb_data.is_null(), p_render_data->directional_light_soft_shadows, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), p_render_data->scene_data->lod_distance_multiplier, p_render_data->scene_data->screen_mesh_lod_threshold, p_render_data->scene_data->view_count, 0, spec_constant_base_flags); + RenderListParameters render_list_params(render_list[RENDER_LIST_OPAQUE].elements.ptr(), render_list[RENDER_LIST_OPAQUE].element_info.ptr(), render_list[RENDER_LIST_OPAQUE].elements.size(), reverse_cull, depth_pass_mode, 0, rb_data.is_null(), p_render_data->directional_light_soft_shadows, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), p_render_data->scene_data->lod_distance_multiplier, p_render_data->scene_data->screen_mesh_lod_threshold, p_render_data->scene_data->view_count, 0, base_specialization); _render_list_with_draw_list(&render_list_params, depth_framebuffer, needs_pre_resolve ? RD::INITIAL_ACTION_LOAD : RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_STORE, needs_pre_resolve ? RD::INITIAL_ACTION_LOAD : RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_STORE, needs_pre_resolve ? Vector() : depth_pass_clear); RD::get_singleton()->draw_command_end_label(); @@ -2019,7 +2095,7 @@ void RenderForwardClustered::_render_scene(RenderDataRD *p_render_data, const Co uint32_t opaque_color_pass_flags = using_motion_pass ? (color_pass_flags & ~COLOR_PASS_FLAG_MOTION_VECTORS) : color_pass_flags; RID opaque_framebuffer = using_motion_pass ? rb_data->get_color_pass_fb(opaque_color_pass_flags) : color_framebuffer; - RenderListParameters render_list_params(render_list[RENDER_LIST_OPAQUE].elements.ptr(), render_list[RENDER_LIST_OPAQUE].element_info.ptr(), render_list[RENDER_LIST_OPAQUE].elements.size(), reverse_cull, PASS_MODE_COLOR, opaque_color_pass_flags, rb_data.is_null(), p_render_data->directional_light_soft_shadows, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), p_render_data->scene_data->lod_distance_multiplier, p_render_data->scene_data->screen_mesh_lod_threshold, p_render_data->scene_data->view_count, 0, spec_constant_base_flags); + RenderListParameters render_list_params(render_list[RENDER_LIST_OPAQUE].elements.ptr(), render_list[RENDER_LIST_OPAQUE].element_info.ptr(), render_list[RENDER_LIST_OPAQUE].elements.size(), reverse_cull, PASS_MODE_COLOR, opaque_color_pass_flags, rb_data.is_null(), p_render_data->directional_light_soft_shadows, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), p_render_data->scene_data->lod_distance_multiplier, p_render_data->scene_data->screen_mesh_lod_threshold, p_render_data->scene_data->view_count, 0, base_specialization); _render_list_with_draw_list(&render_list_params, opaque_framebuffer, load_color ? RD::INITIAL_ACTION_LOAD : RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_STORE, depth_pre_pass ? RD::INITIAL_ACTION_LOAD : RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_STORE, c, 0.0, 0); } @@ -2039,7 +2115,7 @@ void RenderForwardClustered::_render_scene(RenderDataRD *p_render_data, const Co rp_uniform_set = _setup_render_pass_uniform_set(RENDER_LIST_MOTION, p_render_data, radiance_texture, samplers, true); - RenderListParameters render_list_params(render_list[RENDER_LIST_MOTION].elements.ptr(), render_list[RENDER_LIST_MOTION].element_info.ptr(), render_list[RENDER_LIST_MOTION].elements.size(), reverse_cull, PASS_MODE_COLOR, color_pass_flags, rb_data.is_null(), p_render_data->directional_light_soft_shadows, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), p_render_data->scene_data->lod_distance_multiplier, p_render_data->scene_data->screen_mesh_lod_threshold, p_render_data->scene_data->view_count, 0, spec_constant_base_flags); + RenderListParameters render_list_params(render_list[RENDER_LIST_MOTION].elements.ptr(), render_list[RENDER_LIST_MOTION].element_info.ptr(), render_list[RENDER_LIST_MOTION].elements.size(), reverse_cull, PASS_MODE_COLOR, color_pass_flags, rb_data.is_null(), p_render_data->directional_light_soft_shadows, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), p_render_data->scene_data->lod_distance_multiplier, p_render_data->scene_data->screen_mesh_lod_threshold, p_render_data->scene_data->view_count, 0, base_specialization); _render_list_with_draw_list(&render_list_params, color_framebuffer, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_STORE, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_STORE); RD::get_singleton()->draw_command_end_label(); @@ -2218,7 +2294,7 @@ void RenderForwardClustered::_render_scene(RenderDataRD *p_render_data, const Co } RID alpha_framebuffer = rb_data.is_valid() ? rb_data->get_color_pass_fb(transparent_color_pass_flags) : color_only_framebuffer; - RenderListParameters render_list_params(render_list[RENDER_LIST_ALPHA].elements.ptr(), render_list[RENDER_LIST_ALPHA].element_info.ptr(), render_list[RENDER_LIST_ALPHA].elements.size(), reverse_cull, PASS_MODE_COLOR, transparent_color_pass_flags, rb_data.is_null(), p_render_data->directional_light_soft_shadows, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), p_render_data->scene_data->lod_distance_multiplier, p_render_data->scene_data->screen_mesh_lod_threshold, p_render_data->scene_data->view_count, 0, spec_constant_base_flags); + RenderListParameters render_list_params(render_list[RENDER_LIST_ALPHA].elements.ptr(), render_list[RENDER_LIST_ALPHA].element_info.ptr(), render_list[RENDER_LIST_ALPHA].elements.size(), reverse_cull, PASS_MODE_COLOR, transparent_color_pass_flags, rb_data.is_null(), p_render_data->directional_light_soft_shadows, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), p_render_data->scene_data->lod_distance_multiplier, p_render_data->scene_data->screen_mesh_lod_threshold, p_render_data->scene_data->view_count, 0, base_specialization); _render_list_with_draw_list(&render_list_params, alpha_framebuffer, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_STORE, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_STORE); } @@ -2841,6 +2917,9 @@ void RenderForwardClustered::_render_sdfgi(Ref p_render_bu _update_render_base_uniform_set(); + // Indicate pipelines for SDFGI are required. + global_pipeline_data_required.use_sdfgi = true; + PassMode pass_mode = PASS_MODE_SDF; _fill_render_list(RENDER_LIST_SECONDARY, &render_data, pass_mode); render_list[RENDER_LIST_SECONDARY].sort_by_key(); @@ -3734,14 +3813,23 @@ void RenderForwardClustered::GeometryInstanceForwardClustered::_mark_dirty() { RenderForwardClustered::get_singleton()->geometry_instance_dirty_list.add(&dirty_list_element); } -void RenderForwardClustered::_geometry_instance_add_surface_with_material(GeometryInstanceForwardClustered *ginstance, uint32_t p_surface, SceneShaderForwardClustered::MaterialData *p_material, uint32_t p_material_id, uint32_t p_shader_id, RID p_mesh) { - RendererRD::MeshStorage *mesh_storage = RendererRD::MeshStorage::get_singleton(); +void RenderForwardClustered::_update_global_pipeline_data_requirements_from_project() { + const int msaa_3d_mode = GLOBAL_GET("rendering/anti_aliasing/quality/msaa_3d"); + const bool directional_shadow_16_bits = GLOBAL_GET("rendering/lights_and_shadows/directional_shadow/16_bits"); + const bool positional_shadow_16_bits = GLOBAL_GET("rendering/lights_and_shadows/positional_shadow/atlas_16_bits"); + global_pipeline_data_required.use_16_bit_shadows = directional_shadow_16_bits || positional_shadow_16_bits; + global_pipeline_data_required.use_32_bit_shadows = !directional_shadow_16_bits || !positional_shadow_16_bits; + global_pipeline_data_required.texture_samples = RenderSceneBuffersRD::msaa_to_samples(RS::ViewportMSAA(msaa_3d_mode)); +} - bool has_read_screen_alpha = p_material->shader_data->uses_screen_texture || p_material->shader_data->uses_depth_texture || p_material->shader_data->uses_normal_texture; - bool has_base_alpha = (p_material->shader_data->uses_alpha && (!p_material->shader_data->uses_alpha_clip || p_material->shader_data->uses_alpha_antialiasing)) || has_read_screen_alpha; - bool has_blend_alpha = p_material->shader_data->uses_blend_alpha; - bool has_alpha = has_base_alpha || has_blend_alpha; +void RenderForwardClustered::_update_global_pipeline_data_requirements_from_light_storage() { + RendererRD::LightStorage *light_storage = RendererRD::LightStorage::get_singleton(); + global_pipeline_data_required.use_shadow_cubemaps = light_storage->get_shadow_cubemaps_used(); + global_pipeline_data_required.use_shadow_dual_paraboloid = light_storage->get_shadow_dual_paraboloid_used(); +} +void RenderForwardClustered::_geometry_instance_add_surface_with_material(GeometryInstanceForwardClustered *ginstance, uint32_t p_surface, SceneShaderForwardClustered::MaterialData *p_material, uint32_t p_material_id, uint32_t p_shader_id, RID p_mesh) { + RendererRD::MeshStorage *mesh_storage = RendererRD::MeshStorage::get_singleton(); uint32_t flags = 0; if (p_material->shader_data->uses_sss) { @@ -3764,10 +3852,9 @@ void RenderForwardClustered::_geometry_instance_add_surface_with_material(Geomet flags |= GeometryInstanceSurfaceDataCache::FLAG_USES_DOUBLE_SIDED_SHADOWS; } - if (has_alpha || has_read_screen_alpha || p_material->shader_data->depth_draw == SceneShaderForwardClustered::ShaderData::DEPTH_DRAW_DISABLED || p_material->shader_data->depth_test == SceneShaderForwardClustered::ShaderData::DEPTH_TEST_DISABLED) { - //material is only meant for alpha pass + if (p_material->shader_data->uses_alpha_pass()) { flags |= GeometryInstanceSurfaceDataCache::FLAG_PASS_ALPHA; - if ((p_material->shader_data->uses_depth_prepass_alpha || p_material->shader_data->uses_alpha_antialiasing) && !(p_material->shader_data->depth_draw == SceneShaderForwardClustered::ShaderData::DEPTH_DRAW_DISABLED || p_material->shader_data->depth_test == SceneShaderForwardClustered::ShaderData::DEPTH_TEST_DISABLED)) { + if (p_material->shader_data->uses_depth_in_alpha_pass()) { flags |= GeometryInstanceSurfaceDataCache::FLAG_PASS_DEPTH; flags |= GeometryInstanceSurfaceDataCache::FLAG_PASS_SHADOW; } @@ -3787,16 +3874,14 @@ void RenderForwardClustered::_geometry_instance_add_surface_with_material(Geomet SceneShaderForwardClustered::MaterialData *material_shadow = nullptr; void *surface_shadow = nullptr; - if (!p_material->shader_data->uses_particle_trails && !p_material->shader_data->writes_modelview_or_projection && !p_material->shader_data->uses_vertex && !p_material->shader_data->uses_position && !p_material->shader_data->uses_discard && !p_material->shader_data->uses_depth_prepass_alpha && !p_material->shader_data->uses_alpha_clip && !p_material->shader_data->uses_alpha_antialiasing && p_material->shader_data->cull_mode == SceneShaderForwardClustered::ShaderData::CULL_BACK && !p_material->shader_data->uses_point_size && !p_material->shader_data->uses_world_coordinates) { + if (p_material->shader_data->uses_shared_shadow_material()) { flags |= GeometryInstanceSurfaceDataCache::FLAG_USES_SHARED_SHADOW_MATERIAL; material_shadow = static_cast(RendererRD::MaterialStorage::get_singleton()->material_get_data(scene_shader.default_material, RendererRD::MaterialStorage::SHADER_TYPE_3D)); RID shadow_mesh = mesh_storage->mesh_get_shadow_mesh(p_mesh); - if (shadow_mesh.is_valid()) { surface_shadow = mesh_storage->mesh_get_surface(shadow_mesh, p_surface); } - } else { material_shadow = p_material; } @@ -3848,6 +3933,16 @@ void RenderForwardClustered::_geometry_instance_add_surface_with_material(Geomet String mesh_path = mesh_storage->mesh_get_path(p_mesh).is_empty() ? "" : "(" + mesh_storage->mesh_get_path(p_mesh) + ")"; WARN_PRINT_ED(vformat("Attempting to use a shader %s that requires tangents with a mesh %s that doesn't contain tangents. Ensure that meshes are imported with the 'ensure_tangents' option. If creating your own meshes, add an `ARRAY_TANGENT` array (when using ArrayMesh) or call `generate_tangents()` (when using SurfaceTool).", shader_path, mesh_path)); } + +#if PRELOAD_PIPELINES_ON_SURFACE_CACHE_CONSTRUCTION + if (!sdcache->compilation_dirty_element.in_list()) { + geometry_surface_compilation_dirty_list.add(&sdcache->compilation_dirty_element); + } + + if (!sdcache->compilation_all_element.in_list()) { + geometry_surface_compilation_all_list.add(&sdcache->compilation_all_element); + } +#endif } void RenderForwardClustered::_geometry_instance_add_surface_with_material_chain(GeometryInstanceForwardClustered *ginstance, uint32_t p_surface, SceneShaderForwardClustered::MaterialData *p_material, RID p_mat_src, RID p_mesh) { @@ -3859,7 +3954,7 @@ void RenderForwardClustered::_geometry_instance_add_surface_with_material_chain( while (material->next_pass.is_valid()) { RID next_pass = material->next_pass; material = static_cast(material_storage->material_get_data(next_pass, RendererRD::MaterialStorage::SHADER_TYPE_3D)); - if (!material || !material->shader_data->valid) { + if (!material || !material->shader_data->is_valid()) { break; } if (ginstance->data->dirty_dependencies) { @@ -3879,7 +3974,7 @@ void RenderForwardClustered::_geometry_instance_add_surface(GeometryInstanceForw if (m_src.is_valid()) { material = static_cast(material_storage->material_get_data(m_src, RendererRD::MaterialStorage::SHADER_TYPE_3D)); - if (!material || !material->shader_data->valid) { + if (!material || !material->shader_data->is_valid()) { material = nullptr; } } @@ -3901,7 +3996,7 @@ void RenderForwardClustered::_geometry_instance_add_surface(GeometryInstanceForw m_src = ginstance->data->material_overlay; material = static_cast(material_storage->material_get_data(m_src, RendererRD::MaterialStorage::SHADER_TYPE_3D)); - if (material && material->shader_data->valid) { + if (material && material->shader_data->is_valid()) { if (ginstance->data->dirty_dependencies) { material_storage->material_update_dependency(m_src, &ginstance->data->dependency_tracker); } @@ -4068,10 +4163,357 @@ void RenderForwardClustered::_geometry_instance_update(RenderGeometryInstance *p ginstance->dirty_list_element.remove_from_list(); } +static RD::FramebufferFormatID _get_color_framebuffer_format_for_pipeline(RD::DataFormat p_color_format, bool p_can_be_storage, RD::TextureSamples p_samples, bool p_specular, bool p_velocity, uint32_t p_view_count) { + const bool multisampling = p_samples > RD::TEXTURE_SAMPLES_1; + RD::AttachmentFormat attachment; + attachment.samples = p_samples; + + RD::AttachmentFormat unused_attachment; + unused_attachment.usage_flags = RD::AttachmentFormat::UNUSED_ATTACHMENT; + + thread_local Vector attachments; + attachments.clear(); + + // Color attachment. + attachment.format = p_color_format; + attachment.usage_flags = RenderSceneBuffersRD::get_color_usage_bits(false, multisampling, p_can_be_storage); + attachments.push_back(attachment); + + if (p_specular) { + attachment.format = RenderForwardClustered::RenderBufferDataForwardClustered::get_specular_format(); + attachment.usage_flags = RenderForwardClustered::RenderBufferDataForwardClustered::get_specular_usage_bits(false, multisampling, p_can_be_storage); + attachments.push_back(attachment); + } else { + attachments.push_back(unused_attachment); + } + + if (p_velocity) { + attachment.format = RenderSceneBuffersRD::get_velocity_format(); + attachment.usage_flags = RenderSceneBuffersRD::get_velocity_usage_bits(false, multisampling, p_can_be_storage); + attachments.push_back(attachment); + } else { + attachments.push_back(unused_attachment); + } + + // Depth attachment. + attachment.format = RenderSceneBuffersRD::get_depth_format(false, multisampling, p_can_be_storage); + attachment.usage_flags = RenderSceneBuffersRD::get_depth_usage_bits(false, multisampling, p_can_be_storage); + attachments.push_back(attachment); + + thread_local Vector passes; + passes.resize(1); + passes.ptrw()[0].color_attachments.resize(attachments.size() - 1); + + int *color_attachments = passes.ptrw()[0].color_attachments.ptrw(); + for (int64_t i = 0; i < attachments.size() - 1; i++) { + color_attachments[i] = (attachments[i].usage_flags == RD::AttachmentFormat::UNUSED_ATTACHMENT) ? RD::ATTACHMENT_UNUSED : i; + } + + passes.ptrw()[0].depth_attachment = attachments.size() - 1; + + return RD::get_singleton()->framebuffer_format_create_multipass(attachments, passes, p_view_count); +} + +static RD::FramebufferFormatID _get_reflection_probe_color_framebuffer_format_for_pipeline() { + RD::AttachmentFormat attachment; + thread_local Vector attachments; + attachments.clear(); + + attachment.format = RendererRD::LightStorage::get_reflection_probe_color_format(); + attachment.usage_flags = RendererRD::LightStorage::get_reflection_probe_color_usage_bits(); + attachments.push_back(attachment); + + attachment.format = RendererRD::LightStorage::get_reflection_probe_depth_format(); + attachment.usage_flags = RendererRD::LightStorage::get_reflection_probe_depth_usage_bits(); + attachments.push_back(attachment); + + return RD::get_singleton()->framebuffer_format_create(attachments); +} + +static RD::FramebufferFormatID _get_depth_framebuffer_format_for_pipeline(bool p_can_be_storage, RD::TextureSamples p_samples, bool p_normal_roughness, bool p_voxelgi) { + const bool multisampling = p_samples > RD::TEXTURE_SAMPLES_1; + RD::AttachmentFormat attachment; + attachment.samples = p_samples; + + thread_local LocalVector attachments; + attachments.clear(); + + attachment.format = RenderSceneBuffersRD::get_depth_format(false, multisampling, p_can_be_storage); + attachment.usage_flags = RenderSceneBuffersRD::get_depth_usage_bits(false, multisampling, p_can_be_storage); + attachments.push_back(attachment); + + if (p_normal_roughness) { + attachment.format = RenderForwardClustered::RenderBufferDataForwardClustered::get_normal_roughness_format(); + attachment.usage_flags = RenderForwardClustered::RenderBufferDataForwardClustered::get_normal_roughness_usage_bits(false, multisampling, p_can_be_storage); + attachments.push_back(attachment); + } + + if (p_voxelgi) { + attachment.format = RenderForwardClustered::RenderBufferDataForwardClustered::get_voxelgi_format(); + attachment.usage_flags = RenderForwardClustered::RenderBufferDataForwardClustered::get_voxelgi_usage_bits(false, multisampling, p_can_be_storage); + attachments.push_back(attachment); + } + + thread_local Vector passes; + passes.resize(1); + passes.ptrw()[0].color_attachments.resize(attachments.size() - 1); + + int *color_attachments = passes.ptrw()[0].color_attachments.ptrw(); + for (int64_t i = 1; i < attachments.size(); i++) { + color_attachments[i - 1] = (attachments[i].usage_flags == RD::AttachmentFormat::UNUSED_ATTACHMENT) ? RD::ATTACHMENT_UNUSED : i; + } + + passes.ptrw()[0].depth_attachment = 0; + + return RD::get_singleton()->framebuffer_format_create_multipass(attachments, passes); +} + +static RD::FramebufferFormatID _get_shadow_cubemap_framebuffer_format_for_pipeline() { + thread_local LocalVector attachments; + attachments.clear(); + + RD::AttachmentFormat attachment; + attachment.format = RendererRD::LightStorage::get_cubemap_depth_format(); + attachment.usage_flags = RendererRD::LightStorage::get_cubemap_depth_usage_bits(); + attachments.push_back(attachment); + + return RD::get_singleton()->framebuffer_format_create(attachments); +} + +static RD::FramebufferFormatID _get_shadow_atlas_framebuffer_format_for_pipeline(bool p_use_16_bits) { + thread_local LocalVector attachments; + attachments.clear(); + + RD::AttachmentFormat attachment; + attachment.format = RendererRD::LightStorage::get_shadow_atlas_depth_format(p_use_16_bits); + attachment.usage_flags = RendererRD::LightStorage::get_shadow_atlas_depth_usage_bits(); + attachments.push_back(attachment); + + return RD::get_singleton()->framebuffer_format_create(attachments); +} + +static RD::FramebufferFormatID _get_reflection_probe_depth_framebuffer_format_for_pipeline() { + thread_local LocalVector attachments; + attachments.clear(); + + RD::AttachmentFormat attachment; + attachment.format = RendererRD::LightStorage::get_reflection_probe_depth_format(); + attachment.usage_flags = RendererRD::LightStorage::get_reflection_probe_depth_usage_bits(); + attachments.push_back(attachment); + + return RD::get_singleton()->framebuffer_format_create(attachments); +} + +void RenderForwardClustered::_mesh_compile_pipeline_for_surface(SceneShaderForwardClustered::ShaderData *p_shader, void *p_mesh_surface, bool p_ubershader, bool p_instanced_surface, RS::PipelineSource p_source, SceneShaderForwardClustered::ShaderData::PipelineKey &r_pipeline_key, Vector *r_pipeline_pairs) { + RendererRD::MeshStorage *mesh_storage = RendererRD::MeshStorage::get_singleton(); + uint64_t input_mask = p_shader->get_vertex_input_mask(r_pipeline_key.version, r_pipeline_key.color_pass_flags, p_ubershader); + bool pipeline_motion_vectors = r_pipeline_key.color_pass_flags & SceneShaderForwardClustered::PIPELINE_COLOR_PASS_FLAG_MOTION_VECTORS; + r_pipeline_key.vertex_format_id = mesh_storage->mesh_surface_get_vertex_format(p_mesh_surface, input_mask, p_instanced_surface, pipeline_motion_vectors); + r_pipeline_key.ubershader = p_ubershader; + + p_shader->pipeline_hash_map.compile_pipeline(r_pipeline_key, r_pipeline_key.hash(), p_source); + + if (r_pipeline_pairs != nullptr) { + r_pipeline_pairs->push_back({ p_shader, r_pipeline_key }); + } +} + +void RenderForwardClustered::_mesh_compile_pipelines_for_surface(const SurfacePipelineData &p_surface, const GlobalPipelineData &p_global, RS::PipelineSource p_source, Vector *r_pipeline_pairs) { + RendererRD::MeshStorage *mesh_storage = RendererRD::MeshStorage::get_singleton(); + + // Retrieve from the scene shader which groups are currently enabled. + const bool multiview_enabled = p_global.use_multiview && scene_shader.is_multiview_shader_group_enabled(); + const RD::DataFormat buffers_color_format = _render_buffers_get_color_format(); + const bool buffers_can_be_storage = _render_buffers_can_be_storage(); + + // Set the attributes common to all pipelines. + SceneShaderForwardClustered::ShaderData::PipelineKey pipeline_key; + pipeline_key.cull_mode = RD::POLYGON_CULL_DISABLED; + pipeline_key.primitive_type = mesh_storage->mesh_surface_get_primitive(p_surface.mesh_surface); + pipeline_key.wireframe = false; + + // Grab the shader and surface used for most passes. + const uint32_t multiview_iterations = multiview_enabled ? 2 : 1; + const uint32_t lightmap_iterations = p_global.use_lightmaps && p_surface.can_use_lightmap ? 2 : 1; + const uint32_t alpha_iterations = p_surface.uses_transparent ? 2 : 1; + for (uint32_t multiview = 0; multiview < multiview_iterations; multiview++) { + for (uint32_t lightmap = 0; lightmap < lightmap_iterations; lightmap++) { + for (uint32_t alpha = p_surface.uses_opaque ? 0 : 1; alpha < alpha_iterations; alpha++) { + // Generate all the possible variants used during the color pass. + pipeline_key.version = SceneShaderForwardClustered::PIPELINE_VERSION_COLOR_PASS; + pipeline_key.color_pass_flags = 0; + + if (lightmap) { + pipeline_key.color_pass_flags |= SceneShaderForwardClustered::PIPELINE_COLOR_PASS_FLAG_LIGHTMAP; + } + + if (alpha) { + pipeline_key.color_pass_flags |= SceneShaderForwardClustered::PIPELINE_COLOR_PASS_FLAG_TRANSPARENT; + } + + if (multiview) { + pipeline_key.color_pass_flags |= SceneShaderForwardClustered::PIPELINE_COLOR_PASS_FLAG_MULTIVIEW; + } else if (p_global.use_reflection_probes) { + // Reflection probe can't be rendered in multiview. + pipeline_key.framebuffer_format_id = _get_reflection_probe_color_framebuffer_format_for_pipeline(); + _mesh_compile_pipeline_for_surface(p_surface.shader, p_surface.mesh_surface, true, p_surface.instanced, p_source, pipeline_key, r_pipeline_pairs); + } + + // View count is assumed to be 2 as the configuration is dependent on the viewport. It's likely a safe assumption for stereo rendering. + uint32_t view_count = multiview ? 2 : 1; + pipeline_key.framebuffer_format_id = _get_color_framebuffer_format_for_pipeline(buffers_color_format, buffers_can_be_storage, RD::TextureSamples(p_global.texture_samples), false, false, view_count); + _mesh_compile_pipeline_for_surface(p_surface.shader, p_surface.mesh_surface, true, p_surface.instanced, p_source, pipeline_key, r_pipeline_pairs); + + // Generate all the possible variants used during the advanced color passes. + const uint32_t separate_specular_iterations = p_global.use_separate_specular ? 2 : 1; + const uint32_t motion_vectors_iterations = p_global.use_motion_vectors ? 2 : 1; + uint32_t base_color_pass_flags = pipeline_key.color_pass_flags; + for (uint32_t separate_specular = 0; separate_specular < separate_specular_iterations; separate_specular++) { + for (uint32_t motion_vectors = 0; motion_vectors < motion_vectors_iterations; motion_vectors++) { + if (!separate_specular && !motion_vectors) { + // This case was already generated. + continue; + } + + pipeline_key.color_pass_flags = base_color_pass_flags; + + if (separate_specular) { + pipeline_key.color_pass_flags |= SceneShaderForwardClustered::PIPELINE_COLOR_PASS_FLAG_SEPARATE_SPECULAR; + } + + if (motion_vectors) { + pipeline_key.color_pass_flags |= SceneShaderForwardClustered::PIPELINE_COLOR_PASS_FLAG_MOTION_VECTORS; + } + + pipeline_key.framebuffer_format_id = _get_color_framebuffer_format_for_pipeline(buffers_color_format, buffers_can_be_storage, RD::TextureSamples(p_global.texture_samples), separate_specular, motion_vectors, view_count); + _mesh_compile_pipeline_for_surface(p_surface.shader, p_surface.mesh_surface, true, p_surface.instanced, p_source, pipeline_key, r_pipeline_pairs); + } + } + } + } + } + + if (!p_surface.uses_depth) { + return; + } + + // Generate the depth pipelines if the material supports depth or it must be part of the shadow pass. + pipeline_key.color_pass_flags = 0; + + if (p_global.use_normal_and_roughness) { + // A lot of different effects rely on normal and roughness being written to during the depth pass. + pipeline_key.version = SceneShaderForwardClustered::PIPELINE_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS; + pipeline_key.framebuffer_format_id = _get_depth_framebuffer_format_for_pipeline(buffers_can_be_storage, RD::TextureSamples(p_global.texture_samples), true, false); + _mesh_compile_pipeline_for_surface(p_surface.shader, p_surface.mesh_surface, true, p_surface.instanced, p_source, pipeline_key, r_pipeline_pairs); + } + + if (p_global.use_voxelgi) { + // Depth pass with VoxelGI support. + pipeline_key.version = SceneShaderForwardClustered::PIPELINE_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS_AND_VOXEL_GI; + pipeline_key.framebuffer_format_id = _get_depth_framebuffer_format_for_pipeline(buffers_can_be_storage, RD::TextureSamples(p_global.texture_samples), true, true); + _mesh_compile_pipeline_for_surface(p_surface.shader, p_surface.mesh_surface, true, p_surface.instanced, p_source, pipeline_key, r_pipeline_pairs); + } + + if (p_global.use_sdfgi) { + // Depth pass with SDFGI support. + pipeline_key.version = SceneShaderForwardClustered::PIPELINE_VERSION_DEPTH_PASS_WITH_SDF; + pipeline_key.framebuffer_format_id = _get_depth_framebuffer_format_for_pipeline(buffers_can_be_storage, RD::TextureSamples(p_global.texture_samples), false, false); + _mesh_compile_pipeline_for_surface(p_surface.shader, p_surface.mesh_surface, false, p_surface.instanced, p_source, pipeline_key, r_pipeline_pairs); + + // Depth pass with SDFGI support for an empty framebuffer. + pipeline_key.framebuffer_format_id = RD::get_singleton()->framebuffer_format_create_empty(); + _mesh_compile_pipeline_for_surface(p_surface.shader, p_surface.mesh_surface, false, p_surface.instanced, p_source, pipeline_key, r_pipeline_pairs); + } + + // The dedicated depth passes use a different version of the surface and the shader. + pipeline_key.primitive_type = mesh_storage->mesh_surface_get_primitive(p_surface.mesh_surface_shadow); + pipeline_key.version = SceneShaderForwardClustered::PIPELINE_VERSION_DEPTH_PASS; + pipeline_key.framebuffer_format_id = _get_depth_framebuffer_format_for_pipeline(buffers_can_be_storage, RD::TextureSamples(p_global.texture_samples), false, false); + _mesh_compile_pipeline_for_surface(p_surface.shader_shadow, p_surface.mesh_surface_shadow, true, p_surface.instanced, p_source, pipeline_key, r_pipeline_pairs); + + if (p_global.use_shadow_dual_paraboloid) { + pipeline_key.version = SceneShaderForwardClustered::PIPELINE_VERSION_DEPTH_PASS_DP; + _mesh_compile_pipeline_for_surface(p_surface.shader_shadow, p_surface.mesh_surface_shadow, true, p_surface.instanced, p_source, pipeline_key, r_pipeline_pairs); + } + + if (p_global.use_shadow_cubemaps) { + pipeline_key.version = SceneShaderForwardClustered::PIPELINE_VERSION_DEPTH_PASS; + pipeline_key.framebuffer_format_id = _get_shadow_cubemap_framebuffer_format_for_pipeline(); + _mesh_compile_pipeline_for_surface(p_surface.shader_shadow, p_surface.mesh_surface_shadow, true, p_surface.instanced, p_source, pipeline_key, r_pipeline_pairs); + } + + // Atlas shadowmaps (omni lights) can be in both 16-bit and 32-bit versions. + const uint32_t use_16_bits_start = p_global.use_32_bit_shadows ? 0 : 1; + const uint32_t use_16_bits_iterations = p_global.use_16_bit_shadows ? 2 : 1; + for (uint32_t use_16_bits = use_16_bits_start; use_16_bits < use_16_bits_iterations; use_16_bits++) { + pipeline_key.version = SceneShaderForwardClustered::PIPELINE_VERSION_DEPTH_PASS; + pipeline_key.framebuffer_format_id = _get_shadow_atlas_framebuffer_format_for_pipeline(use_16_bits); + _mesh_compile_pipeline_for_surface(p_surface.shader_shadow, p_surface.mesh_surface_shadow, true, p_surface.instanced, p_source, pipeline_key, r_pipeline_pairs); + + if (p_global.use_shadow_dual_paraboloid) { + pipeline_key.version = SceneShaderForwardClustered::PIPELINE_VERSION_DEPTH_PASS_DP; + _mesh_compile_pipeline_for_surface(p_surface.shader_shadow, p_surface.mesh_surface_shadow, true, p_surface.instanced, p_source, pipeline_key, r_pipeline_pairs); + } + } + + if (p_global.use_reflection_probes) { + // Depth pass for reflection probes. Normally this will be redundant as the format is the exact same as the shadow cubemap. + pipeline_key.version = SceneShaderForwardClustered::PIPELINE_VERSION_DEPTH_PASS; + pipeline_key.framebuffer_format_id = _get_reflection_probe_depth_framebuffer_format_for_pipeline(); + _mesh_compile_pipeline_for_surface(p_surface.shader_shadow, p_surface.mesh_surface_shadow, true, p_surface.instanced, p_source, pipeline_key, r_pipeline_pairs); + } +} + +void RenderForwardClustered::_mesh_generate_all_pipelines_for_surface_cache(GeometryInstanceSurfaceDataCache *p_surface_cache, const GlobalPipelineData &p_global) { + bool uses_alpha_pass = (p_surface_cache->flags & GeometryInstanceSurfaceDataCache::FLAG_PASS_ALPHA) != 0; + float multiplied_fade_alpha = p_surface_cache->owner->force_alpha * p_surface_cache->owner->parent_fade_alpha; + bool uses_fade = (multiplied_fade_alpha < FADE_ALPHA_PASS_THRESHOLD) || p_surface_cache->owner->fade_near || p_surface_cache->owner->fade_far; + SurfacePipelineData surface; + surface.mesh_surface = p_surface_cache->surface; + surface.mesh_surface_shadow = p_surface_cache->surface_shadow; + surface.shader = p_surface_cache->shader; + surface.shader_shadow = p_surface_cache->shader_shadow; + surface.instanced = p_surface_cache->owner->mesh_instance.is_valid(); + surface.uses_opaque = !uses_alpha_pass; + surface.uses_transparent = uses_alpha_pass || uses_fade; + surface.uses_depth = (p_surface_cache->flags & (GeometryInstanceSurfaceDataCache::FLAG_PASS_DEPTH | GeometryInstanceSurfaceDataCache::FLAG_PASS_OPAQUE | GeometryInstanceSurfaceDataCache::FLAG_PASS_SHADOW)) != 0; + surface.can_use_lightmap = p_surface_cache->owner->lightmap_instance.is_valid() || p_surface_cache->owner->lightmap_sh; + _mesh_compile_pipelines_for_surface(surface, p_global, RS::PIPELINE_SOURCE_SURFACE); +} + void RenderForwardClustered::_update_dirty_geometry_instances() { while (geometry_instance_dirty_list.first()) { _geometry_instance_update(geometry_instance_dirty_list.first()->self()); } + + if (global_pipeline_data_required.key != global_pipeline_data_compiled.key) { + // Go through the entire list of surfaces and compile pipelines for everything again. + SelfList *list = geometry_surface_compilation_all_list.first(); + while (list != nullptr) { + GeometryInstanceSurfaceDataCache *surface_cache = list->self(); + _mesh_generate_all_pipelines_for_surface_cache(surface_cache, global_pipeline_data_required); + + if (surface_cache->compilation_dirty_element.in_list()) { + // Remove any elements from the dirty list as they don't need to be processed again. + geometry_surface_compilation_dirty_list.remove(&surface_cache->compilation_dirty_element); + } + + list = list->next(); + } + + global_pipeline_data_compiled.key = global_pipeline_data_required.key; + } else { + // Compile pipelines only for the dirty list. + if (!geometry_surface_compilation_dirty_list.first()) { + return; + } + + while (geometry_surface_compilation_dirty_list.first() != nullptr) { + GeometryInstanceSurfaceDataCache *surface_cache = geometry_surface_compilation_dirty_list.first()->self(); + _mesh_generate_all_pipelines_for_surface_cache(surface_cache, global_pipeline_data_compiled); + surface_cache->compilation_dirty_element.remove_from_list(); + } + } } void RenderForwardClustered::_geometry_instance_dependency_changed(Dependency::DependencyChangedNotification p_notification, DependencyTracker *p_tracker) { @@ -4174,6 +4616,66 @@ uint32_t RenderForwardClustered::geometry_instance_get_pair_mask() { return (1 << RS::INSTANCE_VOXEL_GI); } +void RenderForwardClustered::mesh_generate_pipelines(RID p_mesh, bool p_background_compilation) { + RendererRD::MaterialStorage *material_storage = RendererRD::MaterialStorage::get_singleton(); + RendererRD::MeshStorage *mesh_storage = RendererRD::MeshStorage::get_singleton(); + RID shadow_mesh = mesh_storage->mesh_get_shadow_mesh(p_mesh); + uint32_t surface_count = 0; + const RID *materials = mesh_storage->mesh_get_surface_count_and_materials(p_mesh, surface_count); + Vector pipeline_pairs; + for (uint32_t i = 0; i < surface_count; i++) { + if (materials[i].is_null()) { + continue; + } + + void *mesh_surface = mesh_storage->mesh_get_surface(p_mesh, i); + void *mesh_surface_shadow = mesh_surface; + SceneShaderForwardClustered::MaterialData *material = static_cast(material_storage->material_get_data(materials[i], RendererRD::MaterialStorage::SHADER_TYPE_3D)); + if (material == nullptr) { + continue; + } + + SceneShaderForwardClustered::ShaderData *shader = material->shader_data; + SceneShaderForwardClustered::ShaderData *shader_shadow = shader; + if (material->shader_data->uses_shared_shadow_material()) { + SceneShaderForwardClustered::MaterialData *material_shadow = static_cast(material_storage->material_get_data(scene_shader.default_material, RendererRD::MaterialStorage::SHADER_TYPE_3D)); + if (material_shadow != nullptr) { + shader_shadow = material_shadow->shader_data; + if (shadow_mesh.is_valid()) { + mesh_surface_shadow = mesh_storage->mesh_get_surface(shadow_mesh, i); + } + } + } + + if (!shader->is_valid()) { + continue; + } + + SurfacePipelineData surface; + surface.mesh_surface = mesh_surface; + surface.mesh_surface_shadow = mesh_surface_shadow; + surface.shader = shader; + surface.shader_shadow = shader_shadow; + surface.instanced = mesh_storage->mesh_needs_instance(p_mesh, true); + surface.uses_opaque = !material->shader_data->uses_alpha_pass(); + surface.uses_transparent = material->shader_data->uses_alpha_pass(); + surface.uses_depth = surface.uses_opaque || (surface.uses_transparent && material->shader_data->uses_depth_in_alpha_pass()); + surface.can_use_lightmap = mesh_storage->mesh_surface_get_format(mesh_surface) & RS::ARRAY_FORMAT_TEX_UV2; + _mesh_compile_pipelines_for_surface(surface, global_pipeline_data_required, RS::PIPELINE_SOURCE_MESH, &pipeline_pairs); + } + + // Try to retrieve all the pipeline pairs that were compiled. This will force the loader to wait on all ubershader pipelines to be ready. + if (!p_background_compilation && !pipeline_pairs.is_empty()) { + for (ShaderPipelinePair pair : pipeline_pairs) { + pair.first->pipeline_hash_map.get_pipeline(pair.second, pair.second.hash(), true, RS::PIPELINE_SOURCE_MESH); + } + } +} + +uint32_t RenderForwardClustered::get_pipeline_compilations(RS::PipelineSource p_source) { + return scene_shader.get_pipeline_compilations(p_source); +} + void RenderForwardClustered::GeometryInstanceForwardClustered::pair_voxel_gi_instances(const RID *p_voxel_gi_instances, uint32_t p_voxel_gi_instance_count) { if (p_voxel_gi_instance_count > 0) { voxel_gi_instances[0] = p_voxel_gi_instances[0]; @@ -4195,54 +4697,23 @@ void RenderForwardClustered::GeometryInstanceForwardClustered::set_softshadow_pr } void RenderForwardClustered::_update_shader_quality_settings() { - Vector spec_constants; - - RD::PipelineSpecializationConstant sc; - sc.type = RD::PIPELINE_SPECIALIZATION_CONSTANT_TYPE_INT; - - sc.constant_id = SPEC_CONSTANT_SOFT_SHADOW_SAMPLES; - sc.int_value = soft_shadow_samples_get(); - - spec_constants.push_back(sc); - - sc.constant_id = SPEC_CONSTANT_PENUMBRA_SHADOW_SAMPLES; - sc.int_value = penumbra_shadow_samples_get(); - - spec_constants.push_back(sc); - - sc.constant_id = SPEC_CONSTANT_DIRECTIONAL_SOFT_SHADOW_SAMPLES; - sc.int_value = directional_soft_shadow_samples_get(); - - spec_constants.push_back(sc); - - sc.constant_id = SPEC_CONSTANT_DIRECTIONAL_PENUMBRA_SHADOW_SAMPLES; - sc.int_value = directional_penumbra_shadow_samples_get(); - - spec_constants.push_back(sc); - - sc.type = RD::PIPELINE_SPECIALIZATION_CONSTANT_TYPE_BOOL; - sc.constant_id = SPEC_CONSTANT_DECAL_FILTER; - sc.bool_value = decals_get_filter() == RS::DECAL_FILTER_NEAREST_MIPMAPS || + SceneShaderForwardClustered::ShaderSpecialization specialization = {}; + specialization.decal_use_mipmaps = decals_get_filter() == RS::DECAL_FILTER_NEAREST_MIPMAPS || decals_get_filter() == RS::DECAL_FILTER_LINEAR_MIPMAPS || decals_get_filter() == RS::DECAL_FILTER_NEAREST_MIPMAPS_ANISOTROPIC || decals_get_filter() == RS::DECAL_FILTER_LINEAR_MIPMAPS_ANISOTROPIC; - - spec_constants.push_back(sc); - - sc.constant_id = SPEC_CONSTANT_PROJECTOR_FILTER; - sc.bool_value = light_projectors_get_filter() == RS::LIGHT_PROJECTOR_FILTER_NEAREST_MIPMAPS || + ; + specialization.projector_use_mipmaps = light_projectors_get_filter() == RS::LIGHT_PROJECTOR_FILTER_NEAREST_MIPMAPS || light_projectors_get_filter() == RS::LIGHT_PROJECTOR_FILTER_LINEAR_MIPMAPS || light_projectors_get_filter() == RS::LIGHT_PROJECTOR_FILTER_NEAREST_MIPMAPS_ANISOTROPIC || light_projectors_get_filter() == RS::LIGHT_PROJECTOR_FILTER_LINEAR_MIPMAPS_ANISOTROPIC; - spec_constants.push_back(sc); - - sc.constant_id = SPEC_CONSTANT_USE_LIGHTMAP_BICUBIC_FILTER; - sc.bool_value = lightmap_filter_bicubic_get(); - - spec_constants.push_back(sc); - - scene_shader.set_default_specialization_constants(spec_constants); + specialization.soft_shadow_samples = soft_shadow_samples_get(); + specialization.penumbra_shadow_samples = penumbra_shadow_samples_get(); + specialization.directional_soft_shadow_samples = directional_soft_shadow_samples_get(); + specialization.directional_penumbra_shadow_samples = directional_penumbra_shadow_samples_get(); + specialization.use_lightmap_bicubic_filter = lightmap_filter_bicubic_get(); + scene_shader.set_default_specialization(specialization); base_uniforms_changed(); //also need this } diff --git a/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.h b/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.h index 5d14653db6bb..d64ae200c4a2 100644 --- a/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.h +++ b/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.h @@ -65,17 +65,6 @@ class RenderForwardClustered : public RendererSceneRenderRD { MATERIAL_UNIFORM_SET = 3, }; - enum { - SPEC_CONSTANT_SOFT_SHADOW_SAMPLES = 6, - SPEC_CONSTANT_PENUMBRA_SHADOW_SAMPLES = 7, - SPEC_CONSTANT_DIRECTIONAL_SOFT_SHADOW_SAMPLES = 8, - SPEC_CONSTANT_DIRECTIONAL_PENUMBRA_SHADOW_SAMPLES = 9, - SPEC_CONSTANT_DECAL_FILTER = 10, - SPEC_CONSTANT_PROJECTOR_FILTER = 11, - SPEC_CONSTANT_USE_DEPTH_FOG = 12, - SPEC_CONSTANT_USE_LIGHTMAP_BICUBIC_FILTER = 13, - }; - enum { SDFGI_MAX_CASCADES = 8, MAX_VOXEL_GI_INSTANCESS = 8, @@ -96,6 +85,7 @@ class RenderForwardClustered : public RendererSceneRenderRD { SceneShaderForwardClustered scene_shader; +public: /* Framebuffer */ class RenderBufferDataForwardClustered : public RenderBufferCustomDataRD { @@ -155,8 +145,16 @@ class RenderForwardClustered : public RendererSceneRenderRD { virtual void configure(RenderSceneBuffersRD *p_render_buffers) override; virtual void free_data() override; + + static RD::DataFormat get_specular_format(); + static uint32_t get_specular_usage_bits(bool p_resolve, bool p_msaa, bool p_storage); + static RD::DataFormat get_normal_roughness_format(); + static uint32_t get_normal_roughness_usage_bits(bool p_resolve, bool p_msaa, bool p_storage); + static RD::DataFormat get_voxelgi_format(); + static uint32_t get_voxelgi_usage_bits(bool p_resolve, bool p_msaa, bool p_storage); }; +private: virtual void setup_render_buffer_data(Ref p_render_buffers) override; RID render_base_uniform_set; @@ -183,6 +181,7 @@ class RenderForwardClustered : public RendererSceneRenderRD { PASS_MODE_DEPTH_NORMAL_ROUGHNESS_VOXEL_GI, PASS_MODE_DEPTH_MATERIAL, PASS_MODE_SDF, + PASS_MODE_MAX }; enum ColorPassFlags { @@ -212,9 +211,9 @@ class RenderForwardClustered : public RendererSceneRenderRD { RD::FramebufferFormatID framebuffer_format = 0; uint32_t element_offset = 0; bool use_directional_soft_shadow = false; - uint32_t spec_constant_base_flags = 0; + SceneShaderForwardClustered::ShaderSpecialization base_specialization = {}; - RenderListParameters(GeometryInstanceSurfaceDataCache **p_elements, RenderElementInfo *p_element_info, int p_element_count, bool p_reverse_cull, PassMode p_pass_mode, uint32_t p_color_pass_flags, bool p_no_gi, bool p_use_directional_soft_shadows, RID p_render_pass_uniform_set, bool p_force_wireframe = false, const Vector2 &p_uv_offset = Vector2(), float p_lod_distance_multiplier = 0.0, float p_screen_mesh_lod_threshold = 0.0, uint32_t p_view_count = 1, uint32_t p_element_offset = 0, uint32_t p_spec_constant_base_flags = 0) { + RenderListParameters(GeometryInstanceSurfaceDataCache **p_elements, RenderElementInfo *p_element_info, int p_element_count, bool p_reverse_cull, PassMode p_pass_mode, uint32_t p_color_pass_flags, bool p_no_gi, bool p_use_directional_soft_shadows, RID p_render_pass_uniform_set, bool p_force_wireframe = false, const Vector2 &p_uv_offset = Vector2(), float p_lod_distance_multiplier = 0.0, float p_screen_mesh_lod_threshold = 0.0, uint32_t p_view_count = 1, uint32_t p_element_offset = 0, SceneShaderForwardClustered::ShaderSpecialization p_base_specialization = {}) { elements = p_elements; element_info = p_element_info; element_count = p_element_count; @@ -230,7 +229,7 @@ class RenderForwardClustered : public RendererSceneRenderRD { screen_mesh_lod_threshold = p_screen_mesh_lod_threshold; element_offset = p_element_offset; use_directional_soft_shadow = p_use_directional_soft_shadows; - spec_constant_base_flags = p_spec_constant_base_flags; + base_specialization = p_base_specialization; } }; @@ -293,11 +292,17 @@ class RenderForwardClustered : public RendererSceneRenderRD { uint32_t volumetric_fog_pad; }; + struct PushConstantUbershader { + SceneShaderForwardClustered::ShaderSpecialization specialization; + SceneShaderForwardClustered::UbershaderConstants constants; + }; + struct PushConstant { uint32_t base_index; // uint32_t uv_offset; //packed uint32_t multimesh_motion_vectors_current_offset; uint32_t multimesh_motion_vectors_previous_offset; + PushConstantUbershader ubershader; }; struct InstanceData { @@ -450,6 +455,11 @@ class RenderForwardClustered : public RendererSceneRenderRD { GeometryInstanceSurfaceDataCache *next = nullptr; GeometryInstanceForwardClustered *owner = nullptr; + SelfList compilation_dirty_element; + SelfList compilation_all_element; + + GeometryInstanceSurfaceDataCache() : + compilation_dirty_element(this), compilation_all_element(this) {} }; class GeometryInstanceForwardClustered : public RenderGeometryInstanceBase { @@ -500,15 +510,61 @@ class RenderForwardClustered : public RendererSceneRenderRD { static void _geometry_instance_dependency_deleted(const RID &p_dependency, DependencyTracker *p_tracker); SelfList::List geometry_instance_dirty_list; + SelfList::List geometry_surface_compilation_dirty_list; + SelfList::List geometry_surface_compilation_all_list; PagedAllocator geometry_instance_alloc; PagedAllocator geometry_instance_surface_alloc; PagedAllocator geometry_instance_lightmap_sh; + struct SurfacePipelineData { + void *mesh_surface = nullptr; + void *mesh_surface_shadow = nullptr; + SceneShaderForwardClustered::ShaderData *shader = nullptr; + SceneShaderForwardClustered::ShaderData *shader_shadow = nullptr; + bool instanced = false; + bool uses_opaque = false; + bool uses_transparent = false; + bool uses_depth = false; + bool can_use_lightmap = false; + }; + + struct GlobalPipelineData { + union { + struct { + uint32_t texture_samples : 3; + uint32_t use_reflection_probes : 1; + uint32_t use_separate_specular : 1; + uint32_t use_motion_vectors : 1; + uint32_t use_normal_and_roughness : 1; + uint32_t use_lightmaps : 1; + uint32_t use_voxelgi : 1; + uint32_t use_sdfgi : 1; + uint32_t use_multiview : 1; + uint32_t use_16_bit_shadows : 1; + uint32_t use_32_bit_shadows : 1; + uint32_t use_shadow_cubemaps : 1; + uint32_t use_shadow_dual_paraboloid : 1; + }; + + uint32_t key; + }; + }; + + GlobalPipelineData global_pipeline_data_compiled = {}; + GlobalPipelineData global_pipeline_data_required = {}; + + typedef Pair ShaderPipelinePair; + + void _update_global_pipeline_data_requirements_from_project(); + void _update_global_pipeline_data_requirements_from_light_storage(); void _geometry_instance_add_surface_with_material(GeometryInstanceForwardClustered *ginstance, uint32_t p_surface, SceneShaderForwardClustered::MaterialData *p_material, uint32_t p_material_id, uint32_t p_shader_id, RID p_mesh); void _geometry_instance_add_surface_with_material_chain(GeometryInstanceForwardClustered *ginstance, uint32_t p_surface, SceneShaderForwardClustered::MaterialData *p_material, RID p_mat_src, RID p_mesh); void _geometry_instance_add_surface(GeometryInstanceForwardClustered *ginstance, uint32_t p_surface, RID p_material, RID p_mesh); void _geometry_instance_update(RenderGeometryInstance *p_geometry_instance); + void _mesh_compile_pipeline_for_surface(SceneShaderForwardClustered::ShaderData *p_shader, void *p_mesh_surface, bool p_ubershader, bool p_instanced_surface, RS::PipelineSource p_source, SceneShaderForwardClustered::ShaderData::PipelineKey &r_pipeline_key, Vector *r_pipeline_pairs = nullptr); + void _mesh_compile_pipelines_for_surface(const SurfacePipelineData &p_surface, const GlobalPipelineData &p_global, RS::PipelineSource p_source, Vector *r_pipeline_pairs = nullptr); + void _mesh_generate_all_pipelines_for_surface_cache(GeometryInstanceSurfaceDataCache *p_surface_cache, const GlobalPipelineData &p_global); void _update_dirty_geometry_instances(); /* Render List */ @@ -663,8 +719,15 @@ class RenderForwardClustered : public RendererSceneRenderRD { virtual uint32_t geometry_instance_get_pair_mask() override; + /* PIPELINES */ + + virtual void mesh_generate_pipelines(RID p_mesh, bool p_background_compilation) override; + virtual uint32_t get_pipeline_compilations(RS::PipelineSource p_source) override; + virtual bool free(RID p_rid) override; + virtual void update() override; + RenderForwardClustered(); ~RenderForwardClustered(); }; diff --git a/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.cpp b/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.cpp index 6846c3f693fe..c37f5980682a 100644 --- a/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.cpp +++ b/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.cpp @@ -41,9 +41,9 @@ void SceneShaderForwardClustered::ShaderData::set_code(const String &p_code) { //compile code = p_code; - valid = false; ubo_size = 0; uniforms.clear(); + _clear_vertex_input_mask_cache(); if (code.is_empty()) { return; //just invalid, but no error @@ -51,9 +51,9 @@ void SceneShaderForwardClustered::ShaderData::set_code(const String &p_code) { ShaderCompiler::GeneratedCode gen_code; - int blend_mode = BLEND_MODE_MIX; - int depth_testi = DEPTH_TEST_ENABLED; - int alpha_antialiasing_mode = ALPHA_ANTIALIASING_OFF; + blend_mode = BLEND_MODE_MIX; + depth_testi = DEPTH_TEST_ENABLED; + alpha_antialiasing_mode = ALPHA_ANTIALIASING_OFF; int cull_modei = CULL_BACK; uses_point_size = false; @@ -66,8 +66,8 @@ void SceneShaderForwardClustered::ShaderData::set_code(const String &p_code) { uses_roughness = false; uses_normal = false; uses_tangent = false; - bool uses_normal_map = false; - bool wireframe = false; + uses_normal_map = false; + wireframe = false; unshaded = false; uses_vertex = false; @@ -90,7 +90,7 @@ void SceneShaderForwardClustered::ShaderData::set_code(const String &p_code) { actions.render_mode_values["blend_mix"] = Pair(&blend_mode, BLEND_MODE_MIX); actions.render_mode_values["blend_sub"] = Pair(&blend_mode, BLEND_MODE_SUB); actions.render_mode_values["blend_mul"] = Pair(&blend_mode, BLEND_MODE_MUL); - actions.render_mode_values["blend_premul_alpha"] = Pair(&blend_mode, BLEND_MODE_PREMULT_ALPHA); + actions.render_mode_values["blend_premul_alpha"] = Pair(&blend_mode, BLEND_MODE_PREMULTIPLIED_ALPHA); actions.render_mode_values["alpha_to_coverage"] = Pair(&alpha_antialiasing_mode, ALPHA_ANTIALIASING_ALPHA_TO_COVERAGE); actions.render_mode_values["alpha_to_coverage_and_one"] = Pair(&alpha_antialiasing_mode, ALPHA_ANTIALIASING_ALPHA_TO_COVERAGE_AND_TO_ONE); @@ -141,12 +141,12 @@ void SceneShaderForwardClustered::ShaderData::set_code(const String &p_code) { actions.uniforms = &uniforms; - SceneShaderForwardClustered *shader_singleton = (SceneShaderForwardClustered *)SceneShaderForwardClustered::singleton; - Error err = shader_singleton->compiler.compile(RS::SHADER_SPATIAL, code, &actions, path, gen_code); + MutexLock lock(SceneShaderForwardClustered::singleton_mutex); + Error err = SceneShaderForwardClustered::singleton->compiler.compile(RS::SHADER_SPATIAL, code, &actions, path, gen_code); ERR_FAIL_COND_MSG(err != OK, "Shader compilation failed."); if (version.is_null()) { - version = shader_singleton->shader.version_create(); + version = SceneShaderForwardClustered::singleton->shader.version_create(); } depth_draw = DepthDraw(depth_drawi); @@ -178,95 +178,119 @@ void SceneShaderForwardClustered::ShaderData::set_code(const String &p_code) { print_line("\n**vertex_globals:\n" + gen_code.stage_globals[ShaderCompiler::STAGE_VERTEX]); print_line("\n**fragment_globals:\n" + gen_code.stage_globals[ShaderCompiler::STAGE_FRAGMENT]); #endif - shader_singleton->shader.version_set_code(version, gen_code.code, gen_code.uniforms, gen_code.stage_globals[ShaderCompiler::STAGE_VERTEX], gen_code.stage_globals[ShaderCompiler::STAGE_FRAGMENT], gen_code.defines); - ERR_FAIL_COND(!shader_singleton->shader.version_is_valid(version)); + SceneShaderForwardClustered::singleton->shader.version_set_code(version, gen_code.code, gen_code.uniforms, gen_code.stage_globals[ShaderCompiler::STAGE_VERTEX], gen_code.stage_globals[ShaderCompiler::STAGE_FRAGMENT], gen_code.defines); ubo_size = gen_code.uniform_total_size; ubo_offsets = gen_code.uniform_offsets; texture_uniforms = gen_code.texture_uniforms; - //blend modes + pipeline_hash_map.clear_pipelines(); - // if any form of Alpha Antialiasing is enabled, set the blend mode to alpha to coverage + // If any form of Alpha Antialiasing is enabled, set the blend mode to alpha to coverage. if (alpha_antialiasing_mode != ALPHA_ANTIALIASING_OFF) { blend_mode = BLEND_MODE_ALPHA_TO_COVERAGE; } - RD::PipelineColorBlendState::Attachment blend_attachment; + uses_blend_alpha = blend_mode_uses_blend_alpha(BlendMode(blend_mode)); +} - switch (blend_mode) { - case BLEND_MODE_MIX: { - blend_attachment.enable_blend = true; - blend_attachment.alpha_blend_op = RD::BLEND_OP_ADD; - blend_attachment.color_blend_op = RD::BLEND_OP_ADD; - blend_attachment.src_color_blend_factor = RD::BLEND_FACTOR_SRC_ALPHA; - blend_attachment.dst_color_blend_factor = RD::BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; - blend_attachment.src_alpha_blend_factor = RD::BLEND_FACTOR_ONE; - blend_attachment.dst_alpha_blend_factor = RD::BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; +bool SceneShaderForwardClustered::ShaderData::is_animated() const { + return (uses_fragment_time && uses_discard) || (uses_vertex_time && uses_vertex); +} - } break; - case BLEND_MODE_ADD: { - blend_attachment.enable_blend = true; - blend_attachment.alpha_blend_op = RD::BLEND_OP_ADD; - blend_attachment.color_blend_op = RD::BLEND_OP_ADD; - blend_attachment.src_color_blend_factor = RD::BLEND_FACTOR_SRC_ALPHA; - blend_attachment.dst_color_blend_factor = RD::BLEND_FACTOR_ONE; - blend_attachment.src_alpha_blend_factor = RD::BLEND_FACTOR_SRC_ALPHA; - blend_attachment.dst_alpha_blend_factor = RD::BLEND_FACTOR_ONE; - uses_blend_alpha = true; //force alpha used because of blend +bool SceneShaderForwardClustered::ShaderData::casts_shadows() const { + bool has_read_screen_alpha = uses_screen_texture || uses_depth_texture || uses_normal_texture; + bool has_base_alpha = (uses_alpha && (!uses_alpha_clip || uses_alpha_antialiasing)) || has_read_screen_alpha; + bool has_alpha = has_base_alpha || uses_blend_alpha; - } break; - case BLEND_MODE_SUB: { - blend_attachment.enable_blend = true; - blend_attachment.alpha_blend_op = RD::BLEND_OP_REVERSE_SUBTRACT; - blend_attachment.color_blend_op = RD::BLEND_OP_REVERSE_SUBTRACT; - blend_attachment.src_color_blend_factor = RD::BLEND_FACTOR_SRC_ALPHA; - blend_attachment.dst_color_blend_factor = RD::BLEND_FACTOR_ONE; - blend_attachment.src_alpha_blend_factor = RD::BLEND_FACTOR_SRC_ALPHA; - blend_attachment.dst_alpha_blend_factor = RD::BLEND_FACTOR_ONE; - uses_blend_alpha = true; //force alpha used because of blend + return !has_alpha || (uses_depth_prepass_alpha && !(depth_draw == DEPTH_DRAW_DISABLED || depth_test == DEPTH_TEST_DISABLED)); +} +RS::ShaderNativeSourceCode SceneShaderForwardClustered::ShaderData::get_native_source_code() const { + if (version.is_valid()) { + MutexLock lock(SceneShaderForwardClustered::singleton_mutex); + return SceneShaderForwardClustered::singleton->shader.version_get_native_source_code(version); + } else { + return RS::ShaderNativeSourceCode(); + } +} + +SceneShaderForwardClustered::ShaderVersion SceneShaderForwardClustered::ShaderData::_get_shader_version(PipelineVersion p_pipeline_version, uint32_t p_color_pass_flags, bool p_ubershader) const { + uint32_t ubershader_base = p_ubershader ? SHADER_VERSION_DEPTH_PASS_WITH_MATERIAL : 0; + switch (p_pipeline_version) { + case PIPELINE_VERSION_DEPTH_PASS: + return ShaderVersion(SHADER_VERSION_DEPTH_PASS + ubershader_base); + case PIPELINE_VERSION_DEPTH_PASS_DP: + return ShaderVersion(SHADER_VERSION_DEPTH_PASS_DP + ubershader_base); + case PIPELINE_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS: + return ShaderVersion(SHADER_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS + ubershader_base); + case PIPELINE_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS_AND_VOXEL_GI: + return ShaderVersion(SHADER_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS_AND_VOXEL_GI + ubershader_base); + case PIPELINE_VERSION_DEPTH_PASS_MULTIVIEW: + return ShaderVersion(SHADER_VERSION_DEPTH_PASS_MULTIVIEW + ubershader_base); + case PIPELINE_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS_MULTIVIEW: + return ShaderVersion(SHADER_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS_MULTIVIEW + ubershader_base); + case PIPELINE_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS_AND_VOXEL_GI_MULTIVIEW: + return ShaderVersion(SHADER_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS_AND_VOXEL_GI_MULTIVIEW + ubershader_base); + case PIPELINE_VERSION_DEPTH_PASS_WITH_MATERIAL: + return ShaderVersion(SHADER_VERSION_DEPTH_PASS_WITH_MATERIAL + SHADER_VERSION_DEPTH_PASS_WITH_MATERIAL); + case PIPELINE_VERSION_DEPTH_PASS_WITH_SDF: + return ShaderVersion(SHADER_VERSION_DEPTH_PASS_WITH_MATERIAL + SHADER_VERSION_DEPTH_PASS_WITH_SDF); + case PIPELINE_VERSION_COLOR_PASS: { + int shader_flags = 0; + + if (p_ubershader) { + shader_flags |= SHADER_COLOR_PASS_FLAG_UBERSHADER; + } + + if (p_color_pass_flags & PIPELINE_COLOR_PASS_FLAG_SEPARATE_SPECULAR) { + shader_flags |= SHADER_COLOR_PASS_FLAG_SEPARATE_SPECULAR; + } + + if (p_color_pass_flags & PIPELINE_COLOR_PASS_FLAG_MOTION_VECTORS) { + shader_flags |= SHADER_COLOR_PASS_FLAG_MOTION_VECTORS; + } + + if (p_color_pass_flags & PIPELINE_COLOR_PASS_FLAG_LIGHTMAP) { + shader_flags |= SHADER_COLOR_PASS_FLAG_LIGHTMAP; + } + + if (p_color_pass_flags & PIPELINE_COLOR_PASS_FLAG_MULTIVIEW) { + shader_flags |= SHADER_COLOR_PASS_FLAG_MULTIVIEW; + } + + return ShaderVersion(SHADER_VERSION_DEPTH_PASS_WITH_MATERIAL + SHADER_VERSION_COLOR_PASS + shader_flags); } break; - case BLEND_MODE_MUL: { - blend_attachment.enable_blend = true; - blend_attachment.alpha_blend_op = RD::BLEND_OP_ADD; - blend_attachment.color_blend_op = RD::BLEND_OP_ADD; - blend_attachment.src_color_blend_factor = RD::BLEND_FACTOR_DST_COLOR; - blend_attachment.dst_color_blend_factor = RD::BLEND_FACTOR_ZERO; - blend_attachment.src_alpha_blend_factor = RD::BLEND_FACTOR_DST_ALPHA; - blend_attachment.dst_alpha_blend_factor = RD::BLEND_FACTOR_ZERO; - uses_blend_alpha = true; //force alpha used because of blend - } break; - case BLEND_MODE_ALPHA_TO_COVERAGE: { - blend_attachment.enable_blend = true; - blend_attachment.alpha_blend_op = RD::BLEND_OP_ADD; - blend_attachment.color_blend_op = RD::BLEND_OP_ADD; - blend_attachment.src_color_blend_factor = RD::BLEND_FACTOR_SRC_ALPHA; - blend_attachment.dst_color_blend_factor = RD::BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; - blend_attachment.src_alpha_blend_factor = RD::BLEND_FACTOR_ONE; - blend_attachment.dst_alpha_blend_factor = RD::BLEND_FACTOR_ZERO; - } break; - case BLEND_MODE_PREMULT_ALPHA: { - blend_attachment.enable_blend = true; - blend_attachment.alpha_blend_op = RD::BLEND_OP_ADD; - blend_attachment.color_blend_op = RD::BLEND_OP_ADD; - blend_attachment.src_color_blend_factor = RD::BLEND_FACTOR_ONE; - blend_attachment.dst_color_blend_factor = RD::BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; - blend_attachment.src_alpha_blend_factor = RD::BLEND_FACTOR_ONE; - blend_attachment.dst_alpha_blend_factor = RD::BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; - uses_blend_alpha = true; // Force alpha used because of blend. + default: { + DEV_ASSERT(false && "Unknown pipeline version."); + return ShaderVersion(0); } break; } +} + +void SceneShaderForwardClustered::ShaderData::_create_pipeline(PipelineKey p_pipeline_key) { +#if PRINT_PIPELINE_COMPILATION_KEYS + print_line( + "HASH:", p_pipeline_key.hash(), + "VERSION:", version, + "VERTEX:", p_pipeline_key.vertex_format_id, + "FRAMEBUFFER:", p_pipeline_key.framebuffer_format_id, + "CULL:", p_pipeline_key.cull_mode, + "PRIMITIVE:", p_pipeline_key.primitive_type, + "VERSION:", p_pipeline_key.version, + "PASS FLAGS:", p_pipeline_key.color_pass_flags, + "SPEC PACKED #0:", p_pipeline_key.shader_specialization.packed_0, + "WIREFRAME:", p_pipeline_key.wireframe); +#endif // Color pass -> attachment 0: Color/Diffuse, attachment 1: Separate Specular, attachment 2: Motion Vectors + RD::PipelineColorBlendState::Attachment blend_attachment = blend_mode_to_blend_attachment(BlendMode(blend_mode)); RD::PipelineColorBlendState blend_state_color_blend; blend_state_color_blend.attachments = { blend_attachment, RD::PipelineColorBlendState::Attachment(), RD::PipelineColorBlendState::Attachment() }; RD::PipelineColorBlendState blend_state_color_opaque = RD::PipelineColorBlendState::create_disabled(3); RD::PipelineColorBlendState blend_state_depth_normal_roughness = RD::PipelineColorBlendState::create_disabled(1); RD::PipelineColorBlendState blend_state_depth_normal_roughness_giprobe = RD::PipelineColorBlendState::create_disabled(2); - //update pipelines - RD::PipelineDepthStencilState depth_stencil_state; if (depth_test != DEPTH_TEST_DISABLED) { @@ -276,171 +300,162 @@ void SceneShaderForwardClustered::ShaderData::set_code(const String &p_code) { } bool depth_pre_pass_enabled = bool(GLOBAL_GET("rendering/driver/depth_prepass/enable")); - for (int i = 0; i < CULL_VARIANT_MAX; i++) { - RD::PolygonCullMode cull_mode_rd_table[CULL_VARIANT_MAX][3] = { - { RD::POLYGON_CULL_DISABLED, RD::POLYGON_CULL_FRONT, RD::POLYGON_CULL_BACK }, - { RD::POLYGON_CULL_DISABLED, RD::POLYGON_CULL_BACK, RD::POLYGON_CULL_FRONT }, - { RD::POLYGON_CULL_DISABLED, RD::POLYGON_CULL_DISABLED, RD::POLYGON_CULL_DISABLED } - }; + RD::RenderPrimitive primitive_rd_table[RS::PRIMITIVE_MAX] = { + RD::RENDER_PRIMITIVE_POINTS, + RD::RENDER_PRIMITIVE_LINES, + RD::RENDER_PRIMITIVE_LINESTRIPS, + RD::RENDER_PRIMITIVE_TRIANGLES, + RD::RENDER_PRIMITIVE_TRIANGLE_STRIPS, + }; + + RD::RenderPrimitive primitive_rd = uses_point_size ? RD::RENDER_PRIMITIVE_POINTS : primitive_rd_table[p_pipeline_key.primitive_type]; + + RD::PipelineRasterizationState raster_state; + raster_state.cull_mode = p_pipeline_key.cull_mode; + raster_state.wireframe = wireframe || p_pipeline_key.wireframe; + + RD::PipelineMultisampleState multisample_state; + multisample_state.sample_count = RD::get_singleton()->framebuffer_format_get_texture_samples(p_pipeline_key.framebuffer_format_id, 0); + + RD::PipelineColorBlendState blend_state; + if (p_pipeline_key.version == PIPELINE_VERSION_COLOR_PASS) { + if (p_pipeline_key.color_pass_flags & PIPELINE_COLOR_PASS_FLAG_TRANSPARENT) { + if (alpha_antialiasing_mode == ALPHA_ANTIALIASING_ALPHA_TO_COVERAGE) { + multisample_state.enable_alpha_to_coverage = true; + } else if (alpha_antialiasing_mode == ALPHA_ANTIALIASING_ALPHA_TO_COVERAGE_AND_TO_ONE) { + multisample_state.enable_alpha_to_coverage = true; + multisample_state.enable_alpha_to_one = true; + } - RD::PolygonCullMode cull_mode_rd = cull_mode_rd_table[i][cull_mode]; - - for (int j = 0; j < RS::PRIMITIVE_MAX; j++) { - RD::RenderPrimitive primitive_rd_table[RS::PRIMITIVE_MAX] = { - RD::RENDER_PRIMITIVE_POINTS, - RD::RENDER_PRIMITIVE_LINES, - RD::RENDER_PRIMITIVE_LINESTRIPS, - RD::RENDER_PRIMITIVE_TRIANGLES, - RD::RENDER_PRIMITIVE_TRIANGLE_STRIPS, - }; - - RD::RenderPrimitive primitive_rd = uses_point_size ? RD::RENDER_PRIMITIVE_POINTS : primitive_rd_table[j]; - - for (int k = 0; k < PIPELINE_VERSION_MAX; k++) { - ShaderVersion shader_version; - static const ShaderVersion shader_version_table[PIPELINE_VERSION_MAX] = { - SHADER_VERSION_DEPTH_PASS, - SHADER_VERSION_DEPTH_PASS_DP, - SHADER_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS, - SHADER_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS_AND_VOXEL_GI, - SHADER_VERSION_DEPTH_PASS_WITH_MATERIAL, - SHADER_VERSION_DEPTH_PASS_WITH_SDF, - SHADER_VERSION_DEPTH_PASS_MULTIVIEW, - SHADER_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS_MULTIVIEW, - SHADER_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS_AND_VOXEL_GI_MULTIVIEW, - SHADER_VERSION_COLOR_PASS, - }; - - shader_version = shader_version_table[k]; - - if (!static_cast(singleton)->shader.is_variant_enabled(shader_version)) { - continue; - } - RD::PipelineRasterizationState raster_state; - raster_state.cull_mode = cull_mode_rd; - raster_state.wireframe = wireframe; - - if (k == PIPELINE_VERSION_COLOR_PASS) { - for (int l = 0; l < PIPELINE_COLOR_PASS_FLAG_COUNT; l++) { - if (!shader_singleton->valid_color_pass_pipelines[l]) { - continue; - } - - RD::PipelineDepthStencilState depth_stencil = depth_stencil_state; - - RD::PipelineColorBlendState blend_state; - RD::PipelineMultisampleState multisample_state; - - int shader_flags = 0; - if (l & PIPELINE_COLOR_PASS_FLAG_TRANSPARENT) { - if (alpha_antialiasing_mode == ALPHA_ANTIALIASING_ALPHA_TO_COVERAGE) { - multisample_state.enable_alpha_to_coverage = true; - } else if (alpha_antialiasing_mode == ALPHA_ANTIALIASING_ALPHA_TO_COVERAGE_AND_TO_ONE) { - multisample_state.enable_alpha_to_coverage = true; - multisample_state.enable_alpha_to_one = true; - } - - blend_state = blend_state_color_blend; - - if (depth_draw == DEPTH_DRAW_OPAQUE) { - depth_stencil.enable_depth_write = false; //alpha does not draw depth - } - } else { - blend_state = blend_state_color_opaque; - - if (depth_pre_pass_enabled) { - // We already have a depth from the depth pre-pass, there is no need to write it again. - // In addition we can use COMPARE_OP_EQUAL instead of COMPARE_OP_LESS_OR_EQUAL. - // This way we can use the early depth test to discard transparent fragments before the fragment shader even starts. - depth_stencil.depth_compare_operator = RD::COMPARE_OP_EQUAL; - depth_stencil.enable_depth_write = false; - } - - if (l & PIPELINE_COLOR_PASS_FLAG_SEPARATE_SPECULAR) { - shader_flags |= SHADER_COLOR_PASS_FLAG_SEPARATE_SPECULAR; - } - } - - if (l & PIPELINE_COLOR_PASS_FLAG_MOTION_VECTORS) { - shader_flags |= SHADER_COLOR_PASS_FLAG_MOTION_VECTORS; - } - - if (l & PIPELINE_COLOR_PASS_FLAG_LIGHTMAP) { - shader_flags |= SHADER_COLOR_PASS_FLAG_LIGHTMAP; - } - - if (l & PIPELINE_COLOR_PASS_FLAG_MULTIVIEW) { - shader_flags |= SHADER_COLOR_PASS_FLAG_MULTIVIEW; - } - - int variant = shader_version + shader_flags; - - if (!static_cast(singleton)->shader.is_variant_enabled(variant)) { - continue; - } - - RID shader_variant = shader_singleton->shader.version_get_shader(version, variant); - color_pipelines[i][j][l].setup(shader_variant, primitive_rd, raster_state, multisample_state, depth_stencil, blend_state, 0, singleton->default_specialization_constants); - } - } else { - RD::PipelineColorBlendState blend_state; - RD::PipelineDepthStencilState depth_stencil = depth_stencil_state; - RD::PipelineMultisampleState multisample_state; - - if (k == PIPELINE_VERSION_DEPTH_PASS || k == PIPELINE_VERSION_DEPTH_PASS_DP || k == PIPELINE_VERSION_DEPTH_PASS_MULTIVIEW) { - //none, leave empty - } else if (k == PIPELINE_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS || k == PIPELINE_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS_MULTIVIEW) { - blend_state = blend_state_depth_normal_roughness; - } else if (k == PIPELINE_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS_AND_VOXEL_GI || k == PIPELINE_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS_AND_VOXEL_GI_MULTIVIEW) { - blend_state = blend_state_depth_normal_roughness_giprobe; - } else if (k == PIPELINE_VERSION_DEPTH_PASS_WITH_MATERIAL) { - blend_state = RD::PipelineColorBlendState::create_disabled(5); //writes to normal and roughness in opaque way - } else if (k == PIPELINE_VERSION_DEPTH_PASS_WITH_SDF) { - blend_state = RD::PipelineColorBlendState(); //no color targets for SDF - } - - RID shader_variant = shader_singleton->shader.version_get_shader(version, shader_version); - pipelines[i][j][k].setup(shader_variant, primitive_rd, raster_state, multisample_state, depth_stencil, blend_state, 0, singleton->default_specialization_constants); - } + blend_state = blend_state_color_blend; + + if (depth_draw == DEPTH_DRAW_OPAQUE) { + depth_stencil_state.enable_depth_write = false; //alpha does not draw depth } + } else { + blend_state = blend_state_color_opaque; + + if (depth_pre_pass_enabled) { + // We already have a depth from the depth pre-pass, there is no need to write it again. + // In addition we can use COMPARE_OP_EQUAL instead of COMPARE_OP_LESS_OR_EQUAL. + // This way we can use the early depth test to discard transparent fragments before the fragment shader even starts. + depth_stencil_state.depth_compare_operator = RD::COMPARE_OP_EQUAL; + depth_stencil_state.enable_depth_write = false; + } + } + } else { + switch (p_pipeline_key.version) { + case PIPELINE_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS: + case PIPELINE_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS_MULTIVIEW: + blend_state = blend_state_depth_normal_roughness; + break; + case PIPELINE_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS_AND_VOXEL_GI: + case PIPELINE_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS_AND_VOXEL_GI_MULTIVIEW: + blend_state = blend_state_depth_normal_roughness_giprobe; + break; + case PIPELINE_VERSION_DEPTH_PASS_WITH_MATERIAL: + // Writes to normal and roughness in opaque way. + blend_state = RD::PipelineColorBlendState::create_disabled(5); + break; + case PIPELINE_VERSION_DEPTH_PASS: + case PIPELINE_VERSION_DEPTH_PASS_DP: + case PIPELINE_VERSION_DEPTH_PASS_MULTIVIEW: + case PIPELINE_VERSION_DEPTH_PASS_WITH_SDF: + default: + break; } } - valid = true; + // Convert the specialization from the key to pipeline specialization constants. + Vector specialization_constants; + RD::PipelineSpecializationConstant sc; + sc.constant_id = 0; + sc.int_value = p_pipeline_key.shader_specialization.packed_0; + sc.type = RD::PIPELINE_SPECIALIZATION_CONSTANT_TYPE_INT; + specialization_constants.push_back(sc); + + RID shader_rid = get_shader_variant(p_pipeline_key.version, p_pipeline_key.color_pass_flags, p_pipeline_key.ubershader); + ERR_FAIL_COND(shader_rid.is_null()); + + RID pipeline = RD::get_singleton()->render_pipeline_create(shader_rid, p_pipeline_key.framebuffer_format_id, p_pipeline_key.vertex_format_id, primitive_rd, raster_state, multisample_state, depth_stencil_state, blend_state, 0, 0, specialization_constants); + ERR_FAIL_COND(pipeline.is_null()); + + pipeline_hash_map.add_compiled_pipeline(p_pipeline_key.hash(), pipeline); } -bool SceneShaderForwardClustered::ShaderData::is_animated() const { - return (uses_fragment_time && uses_discard) || (uses_vertex_time && uses_vertex); +RD::PolygonCullMode SceneShaderForwardClustered::ShaderData::get_cull_mode_from_cull_variant(CullVariant p_cull_variant) { + const RD::PolygonCullMode cull_mode_rd_table[CULL_VARIANT_MAX][3] = { + { RD::POLYGON_CULL_DISABLED, RD::POLYGON_CULL_FRONT, RD::POLYGON_CULL_BACK }, + { RD::POLYGON_CULL_DISABLED, RD::POLYGON_CULL_BACK, RD::POLYGON_CULL_FRONT }, + { RD::POLYGON_CULL_DISABLED, RD::POLYGON_CULL_DISABLED, RD::POLYGON_CULL_DISABLED } + }; + + return cull_mode_rd_table[p_cull_variant][cull_mode]; } -bool SceneShaderForwardClustered::ShaderData::casts_shadows() const { - bool has_read_screen_alpha = uses_screen_texture || uses_depth_texture || uses_normal_texture; - bool has_base_alpha = (uses_alpha && (!uses_alpha_clip || uses_alpha_antialiasing)) || has_read_screen_alpha; - bool has_alpha = has_base_alpha || uses_blend_alpha; +RID SceneShaderForwardClustered::ShaderData::_get_shader_variant(ShaderVersion p_shader_version) const { + if (version.is_valid()) { + MutexLock lock(SceneShaderForwardClustered::singleton_mutex); + ERR_FAIL_NULL_V(SceneShaderForwardClustered::singleton, RID()); + return SceneShaderForwardClustered::singleton->shader.version_get_shader(version, p_shader_version); + } else { + return RID(); + } +} - return !has_alpha || (uses_depth_prepass_alpha && !(depth_draw == DEPTH_DRAW_DISABLED || depth_test == DEPTH_TEST_DISABLED)); +void SceneShaderForwardClustered::ShaderData::_clear_vertex_input_mask_cache() { + for (uint32_t i = 0; i < VERTEX_INPUT_MASKS_SIZE; i++) { + vertex_input_masks[i].store(0); + } } -RS::ShaderNativeSourceCode SceneShaderForwardClustered::ShaderData::get_native_source_code() const { - SceneShaderForwardClustered *shader_singleton = (SceneShaderForwardClustered *)SceneShaderForwardClustered::singleton; +RID SceneShaderForwardClustered::ShaderData::get_shader_variant(PipelineVersion p_pipeline_version, uint32_t p_color_pass_flags, bool p_ubershader) const { + return _get_shader_variant(_get_shader_version(p_pipeline_version, p_color_pass_flags, p_ubershader)); +} + +uint64_t SceneShaderForwardClustered::ShaderData::get_vertex_input_mask(PipelineVersion p_pipeline_version, uint32_t p_color_pass_flags, bool p_ubershader) { + // Vertex input masks require knowledge of the shader. Since querying the shader can be expensive due to high contention and the necessary mutex, we cache the result instead. + ShaderVersion shader_version = _get_shader_version(p_pipeline_version, p_color_pass_flags, p_ubershader); + uint64_t input_mask = vertex_input_masks[shader_version].load(std::memory_order_relaxed); + if (input_mask == 0) { + RID shader_rid = _get_shader_variant(shader_version); + ERR_FAIL_COND_V(shader_rid.is_null(), 0); - return shader_singleton->shader.version_get_native_source_code(version); + input_mask = RD::get_singleton()->shader_get_vertex_input_attribute_mask(shader_rid); + vertex_input_masks[shader_version].store(input_mask, std::memory_order_relaxed); + } + + return input_mask; +} + +bool SceneShaderForwardClustered::ShaderData::is_valid() const { + if (version.is_valid()) { + MutexLock lock(SceneShaderForwardClustered::singleton_mutex); + ERR_FAIL_NULL_V(SceneShaderForwardClustered::singleton, false); + return SceneShaderForwardClustered::singleton->shader.version_is_valid(version); + } else { + return false; + } } SceneShaderForwardClustered::ShaderData::ShaderData() : shader_list_element(this) { + pipeline_hash_map.set_creation_object_and_function(this, &ShaderData::_create_pipeline); + pipeline_hash_map.set_compilations(SceneShaderForwardClustered::singleton->pipeline_compilations, &SceneShaderForwardClustered::singleton_mutex); } SceneShaderForwardClustered::ShaderData::~ShaderData() { - SceneShaderForwardClustered *shader_singleton = (SceneShaderForwardClustered *)SceneShaderForwardClustered::singleton; - ERR_FAIL_NULL(shader_singleton); - //pipeline variants will clear themselves if shader is gone + pipeline_hash_map.clear_pipelines(); + if (version.is_valid()) { - shader_singleton->shader.version_free(version); + MutexLock lock(SceneShaderForwardClustered::singleton_mutex); + ERR_FAIL_NULL(SceneShaderForwardClustered::singleton); + SceneShaderForwardClustered::singleton->shader.version_free(version); } } RendererRD::MaterialStorage::ShaderData *SceneShaderForwardClustered::_create_shader_func() { + MutexLock lock(SceneShaderForwardClustered::singleton_mutex); ShaderData *shader_data = memnew(ShaderData); singleton->shader_list.add(&shader_data->shader_list_element); return shader_data; @@ -455,9 +470,12 @@ void SceneShaderForwardClustered::MaterialData::set_next_pass(RID p_pass) { } bool SceneShaderForwardClustered::MaterialData::update_parameters(const HashMap &p_parameters, bool p_uniform_dirty, bool p_textures_dirty) { - SceneShaderForwardClustered *shader_singleton = (SceneShaderForwardClustered *)SceneShaderForwardClustered::singleton; - - return update_parameters_uniform_set(p_parameters, p_uniform_dirty, p_textures_dirty, shader_data->uniforms, shader_data->ubo_offsets.ptr(), shader_data->texture_uniforms, shader_data->default_texture_params, shader_data->ubo_size, uniform_set, shader_singleton->shader.version_get_shader(shader_data->version, 0), RenderForwardClustered::MATERIAL_UNIFORM_SET, true, true); + if (shader_data->version.is_valid()) { + MutexLock lock(SceneShaderForwardClustered::singleton_mutex); + return update_parameters_uniform_set(p_parameters, p_uniform_dirty, p_textures_dirty, shader_data->uniforms, shader_data->ubo_offsets.ptr(), shader_data->texture_uniforms, shader_data->default_texture_params, shader_data->ubo_size, uniform_set, SceneShaderForwardClustered::singleton->shader.version_get_shader(shader_data->version, 0), RenderForwardClustered::MATERIAL_UNIFORM_SET, true, true); + } else { + return false; + } } SceneShaderForwardClustered::MaterialData::~MaterialData() { @@ -472,6 +490,7 @@ RendererRD::MaterialStorage::MaterialData *SceneShaderForwardClustered::_create_ } SceneShaderForwardClustered *SceneShaderForwardClustered::singleton = nullptr; +Mutex SceneShaderForwardClustered::singleton_mutex; SceneShaderForwardClustered::SceneShaderForwardClustered() { // there should be only one of these, contained within our RenderFM singleton. @@ -498,17 +517,22 @@ void SceneShaderForwardClustered::init(const String p_defines) { { Vector shader_versions; - shader_versions.push_back(ShaderRD::VariantDefine(SHADER_GROUP_BASE, "\n#define MODE_RENDER_DEPTH\n", true)); // SHADER_VERSION_DEPTH_PASS - shader_versions.push_back(ShaderRD::VariantDefine(SHADER_GROUP_BASE, "\n#define MODE_RENDER_DEPTH\n#define MODE_DUAL_PARABOLOID\n", true)); // SHADER_VERSION_DEPTH_PASS_DP - shader_versions.push_back(ShaderRD::VariantDefine(SHADER_GROUP_BASE, "\n#define MODE_RENDER_DEPTH\n#define MODE_RENDER_NORMAL_ROUGHNESS\n", true)); // SHADER_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS - shader_versions.push_back(ShaderRD::VariantDefine(SHADER_GROUP_ADVANCED, "\n#define MODE_RENDER_DEPTH\n#define MODE_RENDER_NORMAL_ROUGHNESS\n#define MODE_RENDER_VOXEL_GI\n", false)); // SHADER_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS_AND_VOXEL_GI + for (uint32_t ubershader = 0; ubershader < 2; ubershader++) { + const String base_define = ubershader ? "\n#define UBERSHADER\n" : ""; + shader_versions.push_back(ShaderRD::VariantDefine(SHADER_GROUP_BASE, base_define + "\n#define MODE_RENDER_DEPTH\n", true)); // SHADER_VERSION_DEPTH_PASS + shader_versions.push_back(ShaderRD::VariantDefine(SHADER_GROUP_BASE, base_define + "\n#define MODE_RENDER_DEPTH\n#define MODE_DUAL_PARABOLOID\n", true)); // SHADER_VERSION_DEPTH_PASS_DP + shader_versions.push_back(ShaderRD::VariantDefine(SHADER_GROUP_BASE, base_define + "\n#define MODE_RENDER_DEPTH\n#define MODE_RENDER_NORMAL_ROUGHNESS\n", true)); // SHADER_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS + shader_versions.push_back(ShaderRD::VariantDefine(SHADER_GROUP_ADVANCED, base_define + "\n#define MODE_RENDER_DEPTH\n#define MODE_RENDER_NORMAL_ROUGHNESS\n#define MODE_RENDER_VOXEL_GI\n", false)); // SHADER_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS_AND_VOXEL_GI + shader_versions.push_back(ShaderRD::VariantDefine(SHADER_GROUP_MULTIVIEW, base_define + "\n#define USE_MULTIVIEW\n#define MODE_RENDER_DEPTH\n", false)); // SHADER_VERSION_DEPTH_PASS_MULTIVIEW + shader_versions.push_back(ShaderRD::VariantDefine(SHADER_GROUP_MULTIVIEW, base_define + "\n#define USE_MULTIVIEW\n#define MODE_RENDER_DEPTH\n#define MODE_RENDER_NORMAL_ROUGHNESS\n", false)); // SHADER_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS_MULTIVIEW + shader_versions.push_back(ShaderRD::VariantDefine(SHADER_GROUP_ADVANCED_MULTIVIEW, base_define + "\n#define USE_MULTIVIEW\n#define MODE_RENDER_DEPTH\n#define MODE_RENDER_NORMAL_ROUGHNESS\n#define MODE_RENDER_VOXEL_GI\n", false)); // SHADER_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS_AND_VOXEL_GI_MULTIVIEW + } + shader_versions.push_back(ShaderRD::VariantDefine(SHADER_GROUP_ADVANCED, "\n#define MODE_RENDER_DEPTH\n#define MODE_RENDER_MATERIAL\n", false)); // SHADER_VERSION_DEPTH_PASS_WITH_MATERIAL shader_versions.push_back(ShaderRD::VariantDefine(SHADER_GROUP_ADVANCED, "\n#define MODE_RENDER_DEPTH\n#define MODE_RENDER_SDF\n", false)); // SHADER_VERSION_DEPTH_PASS_WITH_SDF - shader_versions.push_back(ShaderRD::VariantDefine(SHADER_GROUP_MULTIVIEW, "\n#define USE_MULTIVIEW\n#define MODE_RENDER_DEPTH\n", false)); // SHADER_VERSION_DEPTH_PASS_MULTIVIEW - shader_versions.push_back(ShaderRD::VariantDefine(SHADER_GROUP_MULTIVIEW, "\n#define USE_MULTIVIEW\n#define MODE_RENDER_DEPTH\n#define MODE_RENDER_NORMAL_ROUGHNESS\n", false)); // SHADER_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS_MULTIVIEW - shader_versions.push_back(ShaderRD::VariantDefine(SHADER_GROUP_MULTIVIEW, "\n#define USE_MULTIVIEW\n#define MODE_RENDER_DEPTH\n#define MODE_RENDER_NORMAL_ROUGHNESS\n#define MODE_RENDER_VOXEL_GI\n", false)); // SHADER_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS_AND_VOXEL_GI_MULTIVIEW Vector color_pass_flags = { + "\n#define UBERSHADER\n", // SHADER_COLOR_PASS_FLAG_UBERSHADER "\n#define MODE_SEPARATE_SPECULAR\n", // SHADER_COLOR_PASS_FLAG_SEPARATE_SPECULAR "\n#define USE_LIGHTMAP\n", // SHADER_COLOR_PASS_FLAG_LIGHTMAP "\n#define USE_MULTIVIEW\n", // SHADER_COLOR_PASS_FLAG_MULTIVIEW @@ -545,16 +569,6 @@ void SceneShaderForwardClustered::init(const String p_defines) { } } - // Set flag to true if a combination is valid. - // The only invalid combinations are those that include both TRANSPARENT and SEPARATE_SPECULAR. - for (int i = 0; i < PIPELINE_COLOR_PASS_FLAG_COUNT; i++) { - if ((i & PIPELINE_COLOR_PASS_FLAG_TRANSPARENT) && (i & PIPELINE_COLOR_PASS_FLAG_SEPARATE_SPECULAR)) { - valid_color_pass_pipelines[i] = false; - } else { - valid_color_pass_pipelines[i] = true; - } - } - material_storage->shader_set_data_request_function(RendererRD::MaterialStorage::SHADER_TYPE_3D, _create_shader_funcs); material_storage->material_set_data_request_function(RendererRD::MaterialStorage::SHADER_TYPE_3D, _create_material_funcs); @@ -772,8 +786,8 @@ void fragment() { material_storage->material_set_shader(default_material, default_shader); MaterialData *md = static_cast(material_storage->material_get_data(default_material, RendererRD::MaterialStorage::SHADER_TYPE_3D)); - default_shader_rd = shader.version_get_shader(md->shader_data->version, SHADER_VERSION_COLOR_PASS); - default_shader_sdfgi_rd = shader.version_get_shader(md->shader_data->version, SHADER_VERSION_DEPTH_PASS_WITH_SDF); + default_shader_rd = md->shader_data->get_shader_variant(PIPELINE_VERSION_COLOR_PASS, 0, false); + default_shader_sdfgi_rd = md->shader_data->get_shader_variant(PIPELINE_VERSION_DEPTH_PASS_WITH_SDF, 0, false); default_material_shader_ptr = md->shader_data; default_material_uniform_set = md->uniform_set; @@ -848,19 +862,11 @@ void fragment() { } } -void SceneShaderForwardClustered::set_default_specialization_constants(const Vector &p_constants) { - default_specialization_constants = p_constants; +void SceneShaderForwardClustered::set_default_specialization(const ShaderSpecialization &p_specialization) { + default_specialization = p_specialization; + for (SelfList *E = shader_list.first(); E; E = E->next()) { - for (int i = 0; i < ShaderData::CULL_VARIANT_MAX; i++) { - for (int j = 0; j < RS::PRIMITIVE_MAX; j++) { - for (int k = 0; k < SHADER_VERSION_MAX; k++) { - E->self()->pipelines[i][j][k].update_specialization_constants(default_specialization_constants); - } - for (int k = 0; k < PIPELINE_COLOR_PASS_FLAG_COUNT; k++) { - E->self()->color_pipelines[i][j][k].update_specialization_constants(default_specialization_constants); - } - } - } + E->self()->pipeline_hash_map.clear_pipelines(); } } @@ -870,3 +876,20 @@ void SceneShaderForwardClustered::enable_advanced_shader_group(bool p_needs_mult } shader.enable_group(SHADER_GROUP_ADVANCED); } + +bool SceneShaderForwardClustered::is_multiview_shader_group_enabled() const { + return shader.is_group_enabled(SHADER_GROUP_MULTIVIEW); +} + +bool SceneShaderForwardClustered::is_advanced_shader_group_enabled(bool p_multiview) const { + if (p_multiview) { + return shader.is_group_enabled(SHADER_GROUP_ADVANCED_MULTIVIEW); + } else { + return shader.is_group_enabled(SHADER_GROUP_ADVANCED); + } +} + +uint32_t SceneShaderForwardClustered::get_pipeline_compilations(RS::PipelineSource p_source) { + MutexLock lock(SceneShaderForwardClustered::singleton_mutex); + return pipeline_compilations[p_source]; +} diff --git a/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.h b/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.h index d5332032f9f0..136514588ac2 100644 --- a/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.h +++ b/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.h @@ -31,6 +31,7 @@ #ifndef SCENE_SHADER_FORWARD_CLUSTERED_H #define SCENE_SHADER_FORWARD_CLUSTERED_H +#include "servers/rendering/renderer_rd/pipeline_hash_map_rd.h" #include "servers/rendering/renderer_rd/renderer_scene_render_rd.h" #include "servers/rendering/renderer_rd/shaders/forward_clustered/scene_forward_clustered.glsl.gen.h" @@ -39,6 +40,7 @@ namespace RendererSceneRenderImplementation { class SceneShaderForwardClustered { private: static SceneShaderForwardClustered *singleton; + static Mutex singleton_mutex; public: enum ShaderGroup { @@ -53,21 +55,22 @@ class SceneShaderForwardClustered { SHADER_VERSION_DEPTH_PASS_DP, SHADER_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS, SHADER_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS_AND_VOXEL_GI, - SHADER_VERSION_DEPTH_PASS_WITH_MATERIAL, - SHADER_VERSION_DEPTH_PASS_WITH_SDF, SHADER_VERSION_DEPTH_PASS_MULTIVIEW, SHADER_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS_MULTIVIEW, SHADER_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS_AND_VOXEL_GI_MULTIVIEW, + SHADER_VERSION_DEPTH_PASS_WITH_MATERIAL, + SHADER_VERSION_DEPTH_PASS_WITH_SDF, SHADER_VERSION_COLOR_PASS, SHADER_VERSION_MAX }; enum ShaderColorPassFlags { - SHADER_COLOR_PASS_FLAG_SEPARATE_SPECULAR = 1 << 0, - SHADER_COLOR_PASS_FLAG_LIGHTMAP = 1 << 1, - SHADER_COLOR_PASS_FLAG_MULTIVIEW = 1 << 2, - SHADER_COLOR_PASS_FLAG_MOTION_VECTORS = 1 << 3, - SHADER_COLOR_PASS_FLAG_COUNT = 1 << 4 + SHADER_COLOR_PASS_FLAG_UBERSHADER = 1 << 0, + SHADER_COLOR_PASS_FLAG_SEPARATE_SPECULAR = 1 << 1, + SHADER_COLOR_PASS_FLAG_LIGHTMAP = 1 << 2, + SHADER_COLOR_PASS_FLAG_MULTIVIEW = 1 << 3, + SHADER_COLOR_PASS_FLAG_MOTION_VECTORS = 1 << 4, + SHADER_COLOR_PASS_FLAG_COUNT = 1 << 5 }; enum PipelineVersion { @@ -90,26 +93,45 @@ class SceneShaderForwardClustered { PIPELINE_COLOR_PASS_FLAG_LIGHTMAP = 1 << 2, PIPELINE_COLOR_PASS_FLAG_MULTIVIEW = 1 << 3, PIPELINE_COLOR_PASS_FLAG_MOTION_VECTORS = 1 << 4, - PIPELINE_COLOR_PASS_FLAG_COUNT = 1 << 5, + PIPELINE_COLOR_PASS_FLAG_OPTIONS = 5, + PIPELINE_COLOR_PASS_FLAG_COMBINATIONS = 1 << PIPELINE_COLOR_PASS_FLAG_OPTIONS, }; - enum ShaderSpecializations { - SHADER_SPECIALIZATION_FORWARD_GI = 1 << 0, - SHADER_SPECIALIZATION_PROJECTOR = 1 << 1, - SHADER_SPECIALIZATION_SOFT_SHADOWS = 1 << 2, - SHADER_SPECIALIZATION_DIRECTIONAL_SOFT_SHADOWS = 1 << 3, + struct ShaderSpecialization { + union { + struct { + uint32_t use_forward_gi : 1; + uint32_t use_light_projector : 1; + uint32_t use_light_soft_shadows : 1; + uint32_t use_directional_soft_shadows : 1; + uint32_t decal_use_mipmaps : 1; + uint32_t projector_use_mipmaps : 1; + uint32_t use_depth_fog : 1; + uint32_t use_lightmap_bicubic_filter : 1; + uint32_t soft_shadow_samples : 4; + uint32_t penumbra_shadow_samples : 4; + uint32_t directional_soft_shadow_samples : 4; + uint32_t directional_penumbra_shadow_samples : 4; + }; + + uint32_t packed_0; + }; + + uint32_t packed_1; + uint32_t packed_2; }; - struct ShaderData : public RendererRD::MaterialStorage::ShaderData { - enum BlendMode { //used internally - BLEND_MODE_MIX, - BLEND_MODE_ADD, - BLEND_MODE_SUB, - BLEND_MODE_MUL, - BLEND_MODE_ALPHA_TO_COVERAGE, - BLEND_MODE_PREMULT_ALPHA, + struct UbershaderConstants { + union { + struct { + uint32_t cull_mode : 2; + }; + + uint32_t packed_0; }; + }; + struct ShaderData : public RendererRD::MaterialStorage::ShaderData { enum DepthDraw { DEPTH_DRAW_DISABLED, DEPTH_DRAW_OPAQUE, @@ -141,11 +163,40 @@ class SceneShaderForwardClustered { ALPHA_ANTIALIASING_ALPHA_TO_COVERAGE_AND_TO_ONE }; - bool valid = false; + struct PipelineKey { + RD::VertexFormatID vertex_format_id; + RD::FramebufferFormatID framebuffer_format_id; + RD::PolygonCullMode cull_mode = RD::POLYGON_CULL_MAX; + RS::PrimitiveType primitive_type = RS::PRIMITIVE_MAX; + PipelineVersion version = PipelineVersion::PIPELINE_VERSION_MAX; + uint32_t color_pass_flags = 0; + ShaderSpecialization shader_specialization = {}; + uint32_t wireframe = false; + uint32_t ubershader = false; + + uint32_t hash() const { + uint32_t h = hash_murmur3_one_64(vertex_format_id); + h = hash_murmur3_one_32(framebuffer_format_id, h); + h = hash_murmur3_one_32(cull_mode, h); + h = hash_murmur3_one_32(primitive_type, h); + h = hash_murmur3_one_32(version, h); + h = hash_murmur3_one_32(color_pass_flags, h); + h = hash_murmur3_one_32(shader_specialization.packed_0, h); + h = hash_murmur3_one_32(shader_specialization.packed_1, h); + h = hash_murmur3_one_32(shader_specialization.packed_2, h); + h = hash_murmur3_one_32(wireframe, h); + h = hash_murmur3_one_32(ubershader, h); + return hash_fmix32(h); + } + }; + + void _create_pipeline(PipelineKey p_pipeline_key); + PipelineHashMapRD pipeline_hash_map; + RID version; - uint64_t vertex_input_mask = 0; - PipelineCacheRD pipelines[CULL_VARIANT_MAX][RS::PRIMITIVE_MAX][PIPELINE_VERSION_MAX]; - PipelineCacheRD color_pipelines[CULL_VARIANT_MAX][RS::PRIMITIVE_MAX][PIPELINE_COLOR_PASS_FLAG_COUNT]; + + static const uint32_t VERTEX_INPUT_MASKS_SIZE = SHADER_VERSION_DEPTH_PASS_WITH_MATERIAL + SHADER_VERSION_COLOR_PASS + SHADER_COLOR_PASS_FLAG_COUNT; + std::atomic vertex_input_masks[VERTEX_INPUT_MASKS_SIZE] = {}; Vector texture_uniforms; @@ -157,6 +208,10 @@ class SceneShaderForwardClustered { DepthDraw depth_draw = DEPTH_DRAW_OPAQUE; DepthTest depth_test = DEPTH_TEST_ENABLED; + int blend_mode = BLEND_MODE_MIX; + int depth_testi = DEPTH_TEST_ENABLED; + int alpha_antialiasing_mode = ALPHA_ANTIALIASING_OFF; + bool uses_point_size = false; bool uses_alpha = false; bool uses_blend_alpha = false; @@ -168,6 +223,8 @@ class SceneShaderForwardClustered { bool uses_normal = false; bool uses_tangent = false; bool uses_particle_trails = false; + bool uses_normal_map = false; + bool wireframe = false; bool unshaded = false; bool uses_vertex = false; @@ -188,11 +245,39 @@ class SceneShaderForwardClustered { uint64_t last_pass = 0; uint32_t index = 0; + _FORCE_INLINE_ bool uses_alpha_pass() const { + bool has_read_screen_alpha = uses_screen_texture || uses_depth_texture || uses_normal_texture; + bool has_base_alpha = (uses_alpha && (!uses_alpha_clip || uses_alpha_antialiasing)) || has_read_screen_alpha; + bool has_blend_alpha = uses_blend_alpha; + bool has_alpha = has_base_alpha || has_blend_alpha; + bool no_depth_draw = depth_draw == DEPTH_DRAW_DISABLED; + bool no_depth_test = depth_test == DEPTH_TEST_DISABLED; + return has_alpha || has_read_screen_alpha || no_depth_draw || no_depth_test; + } + + _FORCE_INLINE_ bool uses_depth_in_alpha_pass() const { + bool no_depth_draw = depth_draw == DEPTH_DRAW_DISABLED; + bool no_depth_test = depth_test == DEPTH_TEST_DISABLED; + return (uses_depth_prepass_alpha || uses_alpha_antialiasing) && !(no_depth_draw || no_depth_test); + } + + _FORCE_INLINE_ bool uses_shared_shadow_material() const { + bool backface_culling = cull_mode == CULL_BACK; + return !uses_particle_trails && !writes_modelview_or_projection && !uses_vertex && !uses_position && !uses_discard && !uses_depth_prepass_alpha && !uses_alpha_clip && !uses_alpha_antialiasing && backface_culling && !uses_point_size && !uses_world_coordinates; + } + virtual void set_code(const String &p_Code); virtual bool is_animated() const; virtual bool casts_shadows() const; virtual RS::ShaderNativeSourceCode get_native_source_code() const; + ShaderVersion _get_shader_version(PipelineVersion p_pipeline_version, uint32_t p_color_pass_flags, bool p_ubershader) const; + RID _get_shader_variant(ShaderVersion p_shader_version) const; + void _clear_vertex_input_mask_cache(); + RID get_shader_variant(PipelineVersion p_pipeline_version, uint32_t p_color_pass_flags, bool p_ubershader) const; + uint64_t get_vertex_input_mask(PipelineVersion p_pipeline_version, uint32_t p_color_pass_flags, bool p_ubershader); + RD::PolygonCullMode get_cull_mode_from_cull_variant(CullVariant p_cull_variant); + bool is_valid() const; SelfList shader_list_element; ShaderData(); @@ -250,14 +335,19 @@ class SceneShaderForwardClustered { RID debug_shadow_splits_material_uniform_set; ShaderData *debug_shadow_splits_material_shader_ptr = nullptr; - Vector default_specialization_constants; - bool valid_color_pass_pipelines[PIPELINE_COLOR_PASS_FLAG_COUNT]; + ShaderSpecialization default_specialization = {}; + + uint32_t pipeline_compilations[RS::PIPELINE_SOURCE_MAX] = {}; + SceneShaderForwardClustered(); ~SceneShaderForwardClustered(); void init(const String p_defines); - void set_default_specialization_constants(const Vector &p_constants); + void set_default_specialization(const ShaderSpecialization &p_specialization); void enable_advanced_shader_group(bool p_needs_multiview = false); + bool is_multiview_shader_group_enabled() const; + bool is_advanced_shader_group_enabled(bool p_multiview) const; + uint32_t get_pipeline_compilations(RS::PipelineSource p_source); }; } // namespace RendererSceneRenderImplementation diff --git a/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.cpp b/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.cpp index 8a02ec0eb569..d3002b74a283 100644 --- a/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.cpp +++ b/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.cpp @@ -38,6 +38,8 @@ #include "servers/rendering/rendering_device.h" #include "servers/rendering/rendering_server_default.h" +#define PRELOAD_PIPELINES_ON_SURFACE_CACHE_CONSTRUCTION 1 + using namespace RendererSceneRenderImplementation; RendererRD::ForwardID RenderForwardMobile::ForwardIDStorageMobile::allocate_forward_id(RendererRD::ForwardIDType p_type) { @@ -277,6 +279,66 @@ void RenderForwardMobile::setup_render_buffer_data(Ref p_r p_render_buffers->set_custom_data(RB_SCOPE_MOBILE, data); } +void RenderForwardMobile::mesh_generate_pipelines(RID p_mesh, bool p_background_compilation) { + RendererRD::MaterialStorage *material_storage = RendererRD::MaterialStorage::get_singleton(); + RendererRD::MeshStorage *mesh_storage = RendererRD::MeshStorage::get_singleton(); + RID shadow_mesh = mesh_storage->mesh_get_shadow_mesh(p_mesh); + uint32_t surface_count = 0; + const RID *materials = mesh_storage->mesh_get_surface_count_and_materials(p_mesh, surface_count); + Vector pipeline_pairs; + for (uint32_t i = 0; i < surface_count; i++) { + if (materials[i].is_null()) { + continue; + } + + void *mesh_surface = mesh_storage->mesh_get_surface(p_mesh, i); + void *mesh_surface_shadow = mesh_surface; + SceneShaderForwardMobile::MaterialData *material = static_cast(material_storage->material_get_data(materials[i], RendererRD::MaterialStorage::SHADER_TYPE_3D)); + if (material == nullptr) { + continue; + } + + SceneShaderForwardMobile::ShaderData *shader = material->shader_data; + SceneShaderForwardMobile::ShaderData *shader_shadow = shader; + if (material->shader_data->uses_shared_shadow_material()) { + SceneShaderForwardMobile::MaterialData *material_shadow = static_cast(material_storage->material_get_data(scene_shader.default_material, RendererRD::MaterialStorage::SHADER_TYPE_3D)); + if (material_shadow != nullptr) { + shader_shadow = material_shadow->shader_data; + if (shadow_mesh.is_valid()) { + mesh_surface_shadow = mesh_storage->mesh_get_surface(shadow_mesh, i); + } + } + } + + if (!shader->is_valid()) { + continue; + } + + SurfacePipelineData surface; + surface.mesh_surface = mesh_surface; + surface.mesh_surface_shadow = mesh_surface_shadow; + surface.shader = shader; + surface.shader_shadow = shader_shadow; + surface.instanced = mesh_storage->mesh_needs_instance(p_mesh, true); + surface.uses_opaque = !material->shader_data->uses_alpha_pass(); + surface.uses_transparent = material->shader_data->uses_alpha_pass(); + surface.uses_depth = surface.uses_opaque || (surface.uses_transparent && material->shader_data->uses_depth_in_alpha_pass()); + surface.can_use_lightmap = mesh_storage->mesh_surface_get_format(mesh_surface) & RS::ARRAY_FORMAT_TEX_UV2; + _mesh_compile_pipelines_for_surface(surface, global_pipeline_data_required, RS::PIPELINE_SOURCE_MESH, &pipeline_pairs); + } + + // Try to retrieve all the pipeline pairs that were compiled. This will force the loader to wait on all ubershader pipelines to be ready. + if (!p_background_compilation && !pipeline_pairs.is_empty()) { + for (ShaderPipelinePair pair : pipeline_pairs) { + pair.first->pipeline_hash_map.get_pipeline(pair.second, pair.second.hash(), true, RS::PIPELINE_SOURCE_MESH); + } + } +} + +uint32_t RenderForwardMobile::get_pipeline_compilations(RS::PipelineSource p_source) { + return scene_shader.get_pipeline_compilations(p_source); +} + bool RenderForwardMobile::free(RID p_rid) { if (RendererSceneRenderRD::free(p_rid)) { return true; @@ -284,6 +346,12 @@ bool RenderForwardMobile::free(RID p_rid) { return false; } +void RenderForwardMobile::update() { + RendererSceneRenderRD::update(); + _update_global_pipeline_data_requirements_from_project(); + _update_global_pipeline_data_requirements_from_light_storage(); +} + /* Render functions */ float RenderForwardMobile::_render_buffers_get_luminance_multiplier() { @@ -750,13 +818,6 @@ void RenderForwardMobile::_render_scene(RenderDataRD *p_render_data, const Color p_render_data->directional_light_count = directional_light_count; - // fill our render lists early so we can find out if we use various features - _fill_render_list(RENDER_LIST_OPAQUE, p_render_data, PASS_MODE_COLOR); - render_list[RENDER_LIST_OPAQUE].sort_by_key(); - render_list[RENDER_LIST_ALPHA].sort_by_reverse_depth_and_priority(); - _fill_instance_data(RENDER_LIST_OPAQUE); - _fill_instance_data(RENDER_LIST_ALPHA); - if (p_render_data->render_info) { p_render_data->render_info->info[RS::VIEWPORT_RENDER_INFO_TYPE_VISIBLE][RS::VIEWPORT_RENDER_INFO_DRAW_CALLS_IN_FRAME] = p_render_data->instances->size(); p_render_data->render_info->info[RS::VIEWPORT_RENDER_INFO_TYPE_VISIBLE][RS::VIEWPORT_RENDER_INFO_OBJECTS_IN_FRAME] = p_render_data->instances->size(); @@ -777,6 +838,9 @@ void RenderForwardMobile::_render_scene(RenderDataRD *p_render_data, const Color merge_transparent_pass = true; // we ignore our screen/depth texture here using_subpass_post_process = false; // not applicable at all for reflection probes. samplers = RendererRD::MaterialStorage::get_singleton()->samplers_rd_get_default(); + + // Indicate pipelines for reflection probes are required. + global_pipeline_data_required.use_reflection_probes = true; } else if (rb_data.is_valid()) { // setup rendering to render buffer screen_size = p_render_data->render_buffers->get_internal_size(); @@ -807,6 +871,21 @@ void RenderForwardMobile::_render_scene(RenderDataRD *p_render_data, const Color ERR_FAIL(); //bug? } + if (p_render_data->scene_data->view_count > 1) { + global_pipeline_data_required.use_multiview = true; + } + + if (scene_state.used_lightmap) { + global_pipeline_data_required.use_lightmaps = true; + } + + // fill our render lists early so we can find out if we use various features + _fill_render_list(RENDER_LIST_OPAQUE, p_render_data, PASS_MODE_COLOR); + render_list[RENDER_LIST_OPAQUE].sort_by_key(); + render_list[RENDER_LIST_ALPHA].sort_by_reverse_depth_and_priority(); + _fill_instance_data(RENDER_LIST_OPAQUE); + _fill_instance_data(RENDER_LIST_ALPHA); + p_render_data->scene_data->emissive_exposure_normalization = -1.0; RD::get_singleton()->draw_command_begin_label("Render Setup"); @@ -924,25 +1003,23 @@ void RenderForwardMobile::_render_scene(RenderDataRD *p_render_data, const Color _pre_opaque_render(p_render_data); - uint32_t spec_constant_base_flags = 0; + SceneShaderForwardMobile::ShaderSpecialization base_specialization = scene_shader.default_specialization; { //figure out spec constants if (p_render_data->directional_light_count > 0) { - if (p_render_data->directional_light_soft_shadows) { - spec_constant_base_flags |= 1 << SPEC_CONSTANT_USING_DIRECTIONAL_SOFT_SHADOWS; - } + base_specialization.use_directional_soft_shadows = p_render_data->directional_light_soft_shadows; } else { - spec_constant_base_flags |= 1 << SPEC_CONSTANT_DISABLE_DIRECTIONAL_LIGHTS; + base_specialization.disable_directional_lights = true; } if (!is_environment(p_render_data->environment) || !environment_get_fog_enabled(p_render_data->environment)) { - spec_constant_base_flags |= 1 << SPEC_CONSTANT_DISABLE_FOG; + base_specialization.disable_fog = true; } if (p_render_data->environment.is_valid() && environment_get_fog_mode(p_render_data->environment) == RS::EnvironmentFogMode::ENV_FOG_MODE_DEPTH) { - spec_constant_base_flags |= 1 << SPEC_CONSTANT_USE_DEPTH_FOG; + base_specialization.use_depth_fog = true; } } @@ -1010,7 +1087,7 @@ void RenderForwardMobile::_render_scene(RenderDataRD *p_render_data, const Color } if (render_list[RENDER_LIST_OPAQUE].elements.size() > 0) { - RenderListParameters render_list_params(render_list[RENDER_LIST_OPAQUE].elements.ptr(), render_list[RENDER_LIST_OPAQUE].element_info.ptr(), render_list[RENDER_LIST_OPAQUE].elements.size(), reverse_cull, PASS_MODE_COLOR, rp_uniform_set, spec_constant_base_flags, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), p_render_data->scene_data->lod_distance_multiplier, p_render_data->scene_data->screen_mesh_lod_threshold, p_render_data->scene_data->view_count); + RenderListParameters render_list_params(render_list[RENDER_LIST_OPAQUE].elements.ptr(), render_list[RENDER_LIST_OPAQUE].element_info.ptr(), render_list[RENDER_LIST_OPAQUE].elements.size(), reverse_cull, PASS_MODE_COLOR, rp_uniform_set, base_specialization, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), p_render_data->scene_data->lod_distance_multiplier, p_render_data->scene_data->screen_mesh_lod_threshold, p_render_data->scene_data->view_count); render_list_params.framebuffer_format = fb_format; render_list_params.subpass = RD::get_singleton()->draw_list_get_current_pass(); // Should now always be 0. @@ -1037,7 +1114,7 @@ void RenderForwardMobile::_render_scene(RenderDataRD *p_render_data, const Color rp_uniform_set = _setup_render_pass_uniform_set(RENDER_LIST_ALPHA, p_render_data, radiance_texture, samplers, true); - RenderListParameters render_list_params(render_list[RENDER_LIST_ALPHA].elements.ptr(), render_list[RENDER_LIST_ALPHA].element_info.ptr(), render_list[RENDER_LIST_ALPHA].elements.size(), reverse_cull, PASS_MODE_COLOR_TRANSPARENT, rp_uniform_set, spec_constant_base_flags, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), p_render_data->scene_data->lod_distance_multiplier, p_render_data->scene_data->screen_mesh_lod_threshold, p_render_data->scene_data->view_count); + RenderListParameters render_list_params(render_list[RENDER_LIST_ALPHA].elements.ptr(), render_list[RENDER_LIST_ALPHA].element_info.ptr(), render_list[RENDER_LIST_ALPHA].elements.size(), reverse_cull, PASS_MODE_COLOR_TRANSPARENT, rp_uniform_set, base_specialization, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), p_render_data->scene_data->lod_distance_multiplier, p_render_data->scene_data->screen_mesh_lod_threshold, p_render_data->scene_data->view_count); render_list_params.framebuffer_format = fb_format; render_list_params.subpass = RD::get_singleton()->draw_list_get_current_pass(); // Should now always be 0. @@ -1088,7 +1165,7 @@ void RenderForwardMobile::_render_scene(RenderDataRD *p_render_data, const Color // this may be needed if we re-introduced steps that change info, not sure which do so in the previous implementation //_setup_environment(p_render_data, is_reflection_probe, screen_size, p_default_bg_color, false); - RenderListParameters render_list_params(render_list[RENDER_LIST_ALPHA].elements.ptr(), render_list[RENDER_LIST_ALPHA].element_info.ptr(), render_list[RENDER_LIST_ALPHA].elements.size(), reverse_cull, PASS_MODE_COLOR, rp_uniform_set, spec_constant_base_flags, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), p_render_data->scene_data->lod_distance_multiplier, p_render_data->scene_data->screen_mesh_lod_threshold, p_render_data->scene_data->view_count); + RenderListParameters render_list_params(render_list[RENDER_LIST_ALPHA].elements.ptr(), render_list[RENDER_LIST_ALPHA].element_info.ptr(), render_list[RENDER_LIST_ALPHA].elements.size(), reverse_cull, PASS_MODE_COLOR, rp_uniform_set, base_specialization, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), p_render_data->scene_data->lod_distance_multiplier, p_render_data->scene_data->screen_mesh_lod_threshold, p_render_data->scene_data->view_count); render_list_params.framebuffer_format = fb_format; render_list_params.subpass = RD::get_singleton()->draw_list_get_current_pass(); // Should now always be 0. @@ -1395,7 +1472,7 @@ void RenderForwardMobile::_render_shadow_end() { RD::get_singleton()->draw_command_begin_label("Shadow Render"); for (SceneState::ShadowPass &shadow_pass : scene_state.shadow_passes) { - RenderListParameters render_list_parameters(render_list[RENDER_LIST_SECONDARY].elements.ptr() + shadow_pass.element_from, render_list[RENDER_LIST_SECONDARY].element_info.ptr() + shadow_pass.element_from, shadow_pass.element_count, shadow_pass.flip_cull, shadow_pass.pass_mode, shadow_pass.rp_uniform_set, 0, false, Vector2(), shadow_pass.lod_distance_multiplier, shadow_pass.screen_mesh_lod_threshold, 1, shadow_pass.element_from); + RenderListParameters render_list_parameters(render_list[RENDER_LIST_SECONDARY].elements.ptr() + shadow_pass.element_from, render_list[RENDER_LIST_SECONDARY].element_info.ptr() + shadow_pass.element_from, shadow_pass.element_count, shadow_pass.flip_cull, shadow_pass.pass_mode, shadow_pass.rp_uniform_set, scene_shader.default_specialization, false, Vector2(), shadow_pass.lod_distance_multiplier, shadow_pass.screen_mesh_lod_threshold, 1, shadow_pass.element_from); _render_list_with_draw_list(&render_list_parameters, shadow_pass.framebuffer, RD::INITIAL_ACTION_DISCARD, RD::FINAL_ACTION_DISCARD, shadow_pass.initial_depth_action, RD::FINAL_ACTION_STORE, Vector(), 0.0, 0, shadow_pass.rect); } @@ -1439,7 +1516,7 @@ void RenderForwardMobile::_render_material(const Transform3D &p_cam_transform, c RENDER_TIMESTAMP("Render 3D Material"); { - RenderListParameters render_list_params(render_list[RENDER_LIST_SECONDARY].elements.ptr(), render_list[RENDER_LIST_SECONDARY].element_info.ptr(), render_list[RENDER_LIST_SECONDARY].elements.size(), true, pass_mode, rp_uniform_set, 0); + RenderListParameters render_list_params(render_list[RENDER_LIST_SECONDARY].elements.ptr(), render_list[RENDER_LIST_SECONDARY].element_info.ptr(), render_list[RENDER_LIST_SECONDARY].elements.size(), true, pass_mode, rp_uniform_set, scene_shader.default_specialization); //regular forward for now Vector clear = { Color(0, 0, 0, 0), @@ -1484,7 +1561,7 @@ void RenderForwardMobile::_render_uv2(const PagedArray RENDER_TIMESTAMP("Render 3D Material"); { - RenderListParameters render_list_params(render_list[RENDER_LIST_SECONDARY].elements.ptr(), render_list[RENDER_LIST_SECONDARY].element_info.ptr(), render_list[RENDER_LIST_SECONDARY].elements.size(), true, pass_mode, rp_uniform_set, true, false); + RenderListParameters render_list_params(render_list[RENDER_LIST_SECONDARY].elements.ptr(), render_list[RENDER_LIST_SECONDARY].element_info.ptr(), render_list[RENDER_LIST_SECONDARY].elements.size(), true, pass_mode, rp_uniform_set, scene_shader.default_specialization, false); //regular forward for now Vector clear = { Color(0, 0, 0, 0), @@ -1568,7 +1645,7 @@ void RenderForwardMobile::_render_particle_collider_heightfield(RID p_fb, const { //regular forward for now - RenderListParameters render_list_params(render_list[RENDER_LIST_SECONDARY].elements.ptr(), render_list[RENDER_LIST_SECONDARY].element_info.ptr(), render_list[RENDER_LIST_SECONDARY].elements.size(), false, pass_mode, rp_uniform_set, 0); + RenderListParameters render_list_params(render_list[RENDER_LIST_SECONDARY].elements.ptr(), render_list[RENDER_LIST_SECONDARY].element_info.ptr(), render_list[RENDER_LIST_SECONDARY].elements.size(), false, pass_mode, rp_uniform_set, scene_shader.default_specialization); _render_list_with_draw_list(&render_list_params, p_fb, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_STORE, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_STORE); } RD::get_singleton()->draw_command_end_label(); @@ -1795,6 +1872,7 @@ void RenderForwardMobile::_fill_render_list(RenderListType p_render_list, const scene_state.used_screen_texture = false; scene_state.used_normal_texture = false; scene_state.used_depth_texture = false; + scene_state.used_lightmap = false; } uint32_t lightmap_captures_used = 0; @@ -1943,6 +2021,7 @@ void RenderForwardMobile::_fill_render_list(RenderListType p_render_list, const if (uses_lightmap) { surf->sort.uses_lightmap = 1; // This needs to become our lightmap index but we'll do that in a separate PR. + scene_state.used_lightmap = true; } if (surf->flags & GeometryInstanceSurfaceDataCache::FLAG_USES_SUBSURFACE_SCATTERING) { @@ -2036,14 +2115,21 @@ void RenderForwardMobile::_render_list_template(RenderingDevice::DrawListID p_dr RD::get_singleton()->draw_list_bind_uniform_set(draw_list, p_params->render_pass_uniform_set, RENDER_PASS_UNIFORM_SET); RD::get_singleton()->draw_list_bind_uniform_set(draw_list, scene_shader.default_vec4_xform_uniform_set, TRANSFORMS_UNIFORM_SET); + RID material_uniform_set; RID prev_material_uniform_set; RID prev_vertex_array_rd; RID prev_index_array_rd; - RID prev_pipeline_rd; RID prev_xforms_uniform_set; bool should_request_redraw = false; + void *mesh_surface; + SceneShaderForwardMobile::ShaderData *shader = nullptr; + SceneShaderForwardMobile::ShaderData *prev_shader = nullptr; + SceneShaderForwardMobile::ShaderData::PipelineKey pipeline_key; + uint32_t pipeline_hash = 0; + uint32_t prev_pipeline_hash = 0; + bool shadow_pass = (p_params->pass_mode == PASS_MODE_SHADOW) || (p_params->pass_mode == PASS_MODE_SHADOW_DP); for (uint32_t i = p_from_element; i < p_to_element; i++) { @@ -2055,11 +2141,8 @@ void RenderForwardMobile::_render_list_template(RenderingDevice::DrawListID p_dr continue; } - uint32_t base_spec_constants = p_params->spec_constant_base_flags; - - if (bool(inst->flags_cache & INSTANCE_DATA_FLAG_MULTIMESH)) { - base_spec_constants |= 1 << SPEC_CONSTANT_IS_MULTIMESH; - } + SceneShaderForwardMobile::ShaderSpecialization pipeline_specialization = p_params->base_specialization; + pipeline_specialization.is_multimesh = bool(inst->flags_cache & INSTANCE_DATA_FLAG_MULTIMESH); SceneState::PushConstant push_constant; push_constant.base_index = i + p_params->element_offset; @@ -2072,35 +2155,18 @@ void RenderForwardMobile::_render_list_template(RenderingDevice::DrawListID p_dr push_constant.uv_offset[1] = 0.0; } - RID material_uniform_set; - SceneShaderForwardMobile::ShaderData *shader; - void *mesh_surface; - if (shadow_pass) { material_uniform_set = surf->material_uniform_set_shadow; shader = surf->shader_shadow; mesh_surface = surf->surface_shadow; } else { - if (inst->use_projector) { - base_spec_constants |= 1 << SPEC_CONSTANT_USING_PROJECTOR; - } - if (inst->use_soft_shadow) { - base_spec_constants |= 1 << SPEC_CONSTANT_USING_SOFT_SHADOWS; - } - - if (inst->omni_light_count == 0) { - base_spec_constants |= 1 << SPEC_CONSTANT_DISABLE_OMNI_LIGHTS; - } - if (inst->spot_light_count == 0) { - base_spec_constants |= 1 << SPEC_CONSTANT_DISABLE_SPOT_LIGHTS; - } - if (inst->reflection_probe_count == 0) { - base_spec_constants |= 1 << SPEC_CONSTANT_DISABLE_REFLECTION_PROBES; - } - if (inst->decals_count == 0) { - base_spec_constants |= 1 << SPEC_CONSTANT_DISABLE_DECALS; - } + pipeline_specialization.use_light_projector = inst->use_projector; + pipeline_specialization.use_light_soft_shadows = inst->use_soft_shadow; + pipeline_specialization.disable_omni_lights = inst->omni_light_count == 0; + pipeline_specialization.disable_spot_lights = inst->spot_light_count == 0; + pipeline_specialization.disable_reflection_probes = inst->reflection_probe_count == 0; + pipeline_specialization.disable_decals = inst->decals_count == 0; #ifdef DEBUG_ENABLED if (unlikely(get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_LIGHTING)) { @@ -2145,44 +2211,79 @@ void RenderForwardMobile::_render_list_template(RenderingDevice::DrawListID p_dr cull_variant = mirror ? SceneShaderForwardMobile::ShaderData::CULL_VARIANT_REVERSED : SceneShaderForwardMobile::ShaderData::CULL_VARIANT_NORMAL; } - RS::PrimitiveType primitive = surf->primitive; + pipeline_key.primitive_type = surf->primitive; RID xforms_uniform_set = surf->owner->transforms_uniform_set; - SceneShaderForwardMobile::ShaderVersion shader_version = SceneShaderForwardMobile::SHADER_VERSION_MAX; // Assigned to silence wrong -Wmaybe-initialized. - switch (p_params->pass_mode) { case PASS_MODE_COLOR: case PASS_MODE_COLOR_TRANSPARENT: { if (element_info.uses_lightmap) { - shader_version = p_params->view_count > 1 ? SceneShaderForwardMobile::SHADER_VERSION_LIGHTMAP_COLOR_PASS_MULTIVIEW : SceneShaderForwardMobile::SHADER_VERSION_LIGHTMAP_COLOR_PASS; + pipeline_key.version = p_params->view_count > 1 ? SceneShaderForwardMobile::SHADER_VERSION_LIGHTMAP_COLOR_PASS_MULTIVIEW : SceneShaderForwardMobile::SHADER_VERSION_LIGHTMAP_COLOR_PASS; } else { - shader_version = p_params->view_count > 1 ? SceneShaderForwardMobile::SHADER_VERSION_COLOR_PASS_MULTIVIEW : SceneShaderForwardMobile::SHADER_VERSION_COLOR_PASS; + pipeline_key.version = p_params->view_count > 1 ? SceneShaderForwardMobile::SHADER_VERSION_COLOR_PASS_MULTIVIEW : SceneShaderForwardMobile::SHADER_VERSION_COLOR_PASS; } } break; case PASS_MODE_SHADOW: { - shader_version = p_params->view_count > 1 ? SceneShaderForwardMobile::SHADER_VERSION_SHADOW_PASS_MULTIVIEW : SceneShaderForwardMobile::SHADER_VERSION_SHADOW_PASS; + pipeline_key.version = p_params->view_count > 1 ? SceneShaderForwardMobile::SHADER_VERSION_SHADOW_PASS_MULTIVIEW : SceneShaderForwardMobile::SHADER_VERSION_SHADOW_PASS; } break; case PASS_MODE_SHADOW_DP: { ERR_FAIL_COND_MSG(p_params->view_count > 1, "Multiview not supported for shadow DP pass"); - shader_version = SceneShaderForwardMobile::SHADER_VERSION_SHADOW_PASS_DP; + pipeline_key.version = SceneShaderForwardMobile::SHADER_VERSION_SHADOW_PASS_DP; } break; case PASS_MODE_DEPTH_MATERIAL: { ERR_FAIL_COND_MSG(p_params->view_count > 1, "Multiview not supported for material pass"); - shader_version = SceneShaderForwardMobile::SHADER_VERSION_DEPTH_PASS_WITH_MATERIAL; + pipeline_key.version = SceneShaderForwardMobile::SHADER_VERSION_DEPTH_PASS_WITH_MATERIAL; } break; } - PipelineCacheRD *pipeline = &shader->pipelines[cull_variant][primitive][shader_version]; + pipeline_key.framebuffer_format_id = framebuffer_format; + pipeline_key.wireframe = p_params->force_wireframe; + pipeline_key.render_pass = p_params->subpass; + pipeline_key.ubershader = 0; + const RD::PolygonCullMode cull_mode = shader->get_cull_mode_from_cull_variant(cull_variant); RD::VertexFormatID vertex_format = -1; + RID pipeline_rd; RID vertex_array_rd; RID index_array_rd; + const uint32_t ubershader_iterations = 2; + while (pipeline_key.ubershader < ubershader_iterations) { + // Skeleton and blend shape. + uint64_t input_mask = shader->get_vertex_input_mask(pipeline_key.version, pipeline_key.ubershader); + if (surf->owner->mesh_instance.is_valid()) { + mesh_storage->mesh_instance_surface_get_vertex_arrays_and_format(surf->owner->mesh_instance, surf->surface_index, input_mask, false, vertex_array_rd, vertex_format); + } else { + mesh_storage->mesh_surface_get_vertex_arrays_and_format(mesh_surface, input_mask, false, vertex_array_rd, vertex_format); + } - //skeleton and blend shape - if (surf->owner->mesh_instance.is_valid()) { - mesh_storage->mesh_instance_surface_get_vertex_arrays_and_format(surf->owner->mesh_instance, surf->surface_index, pipeline->get_vertex_input_mask(), false, vertex_array_rd, vertex_format); - } else { - mesh_storage->mesh_surface_get_vertex_arrays_and_format(mesh_surface, pipeline->get_vertex_input_mask(), false, vertex_array_rd, vertex_format); + index_array_rd = mesh_storage->mesh_surface_get_index_array(mesh_surface, element_info.lod_index); + pipeline_key.vertex_format_id = vertex_format; + + if (pipeline_key.ubershader) { + pipeline_key.shader_specialization = {}; + pipeline_key.cull_mode = RD::POLYGON_CULL_DISABLED; + } else { + pipeline_key.shader_specialization = pipeline_specialization; + pipeline_key.cull_mode = cull_mode; + } + + pipeline_hash = pipeline_key.hash(); + + if (shader != prev_shader || pipeline_hash != prev_pipeline_hash) { + RS::PipelineSource pipeline_source = pipeline_key.ubershader ? RS::PIPELINE_SOURCE_DRAW : RS::PIPELINE_SOURCE_SPECIALIZATION; + pipeline_rd = shader->pipeline_hash_map.get_pipeline(pipeline_key, pipeline_hash, pipeline_key.ubershader, pipeline_source); + + if (pipeline_rd.is_valid()) { + prev_shader = shader; + prev_pipeline_hash = pipeline_hash; + break; + } else { + pipeline_key.ubershader++; + } + } else { + // The same pipeline is bound already. + break; + } } index_array_rd = mesh_storage->mesh_surface_get_index_array(mesh_surface, element_info.lod_index); @@ -2199,11 +2300,8 @@ void RenderForwardMobile::_render_list_template(RenderingDevice::DrawListID p_dr prev_index_array_rd = index_array_rd; } - RID pipeline_rd = pipeline->get_render_pipeline(vertex_format, framebuffer_format, p_params->force_wireframe, p_params->subpass, base_spec_constants); - - if (pipeline_rd != prev_pipeline_rd) { + if (!pipeline_rd.is_null()) { RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, pipeline_rd); - prev_pipeline_rd = pipeline_rd; } if (xforms_uniform_set.is_valid() && prev_xforms_uniform_set != xforms_uniform_set) { @@ -2220,7 +2318,17 @@ void RenderForwardMobile::_render_list_template(RenderingDevice::DrawListID p_dr prev_material_uniform_set = material_uniform_set; } - RD::get_singleton()->draw_list_set_push_constant(draw_list, &push_constant, sizeof(SceneState::PushConstant)); + size_t push_constant_size = 0; + if (pipeline_key.ubershader) { + push_constant_size = sizeof(SceneState::PushConstant); + push_constant.ubershader.specialization = pipeline_specialization; + push_constant.ubershader.constants = {}; + push_constant.ubershader.constants.cull_mode = cull_mode; + } else { + push_constant_size = sizeof(SceneState::PushConstant) - sizeof(SceneState::PushConstantUbershader); + } + + RD::get_singleton()->draw_list_set_push_constant(draw_list, &push_constant, push_constant_size); uint32_t instance_count = surf->owner->instance_count > 1 ? surf->owner->instance_count : 1; if (surf->flags & GeometryInstanceSurfaceDataCache::FLAG_USES_PARTICLE_TRAILS) { @@ -2363,14 +2471,25 @@ void RenderForwardMobile::GeometryInstanceForwardMobile::_mark_dirty() { RenderForwardMobile::get_singleton()->geometry_instance_dirty_list.add(&dirty_list_element); } -void RenderForwardMobile::_geometry_instance_add_surface_with_material(GeometryInstanceForwardMobile *ginstance, uint32_t p_surface, SceneShaderForwardMobile::MaterialData *p_material, uint32_t p_material_id, uint32_t p_shader_id, RID p_mesh) { - RendererRD::MeshStorage *mesh_storage = RendererRD::MeshStorage::get_singleton(); +void RenderForwardMobile::_update_global_pipeline_data_requirements_from_project() { + const int msaa_2d_mode = GLOBAL_GET("rendering/anti_aliasing/quality/msaa_2d"); + const int msaa_3d_mode = GLOBAL_GET("rendering/anti_aliasing/quality/msaa_3d"); + const bool directional_shadow_16_bits = GLOBAL_GET("rendering/lights_and_shadows/directional_shadow/16_bits"); + const bool positional_shadow_16_bits = GLOBAL_GET("rendering/lights_and_shadows/positional_shadow/atlas_16_bits"); + global_pipeline_data_required.use_16_bit_shadows = directional_shadow_16_bits || positional_shadow_16_bits; + global_pipeline_data_required.use_32_bit_shadows = !directional_shadow_16_bits || !positional_shadow_16_bits; + global_pipeline_data_required.target_samples = RenderSceneBuffersRD::msaa_to_samples(RS::ViewportMSAA(msaa_2d_mode)); + global_pipeline_data_required.texture_samples = RenderSceneBuffersRD::msaa_to_samples(RS::ViewportMSAA(msaa_3d_mode)); +} - bool has_read_screen_alpha = p_material->shader_data->uses_screen_texture || p_material->shader_data->uses_depth_texture || p_material->shader_data->uses_normal_texture; - bool has_base_alpha = p_material->shader_data->uses_alpha && (!p_material->shader_data->uses_alpha_clip || p_material->shader_data->uses_alpha_antialiasing); - bool has_blend_alpha = p_material->shader_data->uses_blend_alpha; - bool has_alpha = has_base_alpha || has_blend_alpha || has_read_screen_alpha; +void RenderForwardMobile::_update_global_pipeline_data_requirements_from_light_storage() { + RendererRD::LightStorage *light_storage = RendererRD::LightStorage::get_singleton(); + global_pipeline_data_required.use_shadow_cubemaps = light_storage->get_shadow_cubemaps_used(); + global_pipeline_data_required.use_shadow_dual_paraboloid = light_storage->get_shadow_dual_paraboloid_used(); +} +void RenderForwardMobile::_geometry_instance_add_surface_with_material(GeometryInstanceForwardMobile *ginstance, uint32_t p_surface, SceneShaderForwardMobile::MaterialData *p_material, uint32_t p_material_id, uint32_t p_shader_id, RID p_mesh) { + RendererRD::MeshStorage *mesh_storage = RendererRD::MeshStorage::get_singleton(); uint32_t flags = 0; if (p_material->shader_data->uses_sss) { @@ -2393,10 +2512,9 @@ void RenderForwardMobile::_geometry_instance_add_surface_with_material(GeometryI flags |= GeometryInstanceSurfaceDataCache::FLAG_USES_DOUBLE_SIDED_SHADOWS; } - if (has_alpha || p_material->shader_data->depth_draw == SceneShaderForwardMobile::ShaderData::DEPTH_DRAW_DISABLED || p_material->shader_data->depth_test == SceneShaderForwardMobile::ShaderData::DEPTH_TEST_DISABLED) { - //material is only meant for alpha pass + if (p_material->shader_data->uses_alpha_pass()) { flags |= GeometryInstanceSurfaceDataCache::FLAG_PASS_ALPHA; - if ((p_material->shader_data->uses_depth_prepass_alpha || p_material->shader_data->uses_alpha_antialiasing) && !(p_material->shader_data->depth_draw == SceneShaderForwardMobile::ShaderData::DEPTH_DRAW_DISABLED || p_material->shader_data->depth_test == SceneShaderForwardMobile::ShaderData::DEPTH_TEST_DISABLED)) { + if (p_material->shader_data->uses_depth_in_alpha_pass()) { flags |= GeometryInstanceSurfaceDataCache::FLAG_PASS_DEPTH; flags |= GeometryInstanceSurfaceDataCache::FLAG_PASS_SHADOW; } @@ -2412,7 +2530,7 @@ void RenderForwardMobile::_geometry_instance_add_surface_with_material(GeometryI SceneShaderForwardMobile::MaterialData *material_shadow = nullptr; void *surface_shadow = nullptr; - if (!p_material->shader_data->uses_particle_trails && !p_material->shader_data->writes_modelview_or_projection && !p_material->shader_data->uses_vertex && !p_material->shader_data->uses_discard && !p_material->shader_data->uses_depth_prepass_alpha && !p_material->shader_data->uses_alpha_clip && !p_material->shader_data->uses_alpha_antialiasing && !p_material->shader_data->uses_world_coordinates) { + if (p_material->shader_data->uses_shared_shadow_material()) { flags |= GeometryInstanceSurfaceDataCache::FLAG_USES_SHARED_SHADOW_MATERIAL; material_shadow = static_cast(RendererRD::MaterialStorage::get_singleton()->material_get_data(scene_shader.default_material, RendererRD::MaterialStorage::SHADER_TYPE_3D)); @@ -2471,6 +2589,16 @@ void RenderForwardMobile::_geometry_instance_add_surface_with_material(GeometryI String mesh_path = mesh_storage->mesh_get_path(p_mesh).is_empty() ? "" : "(" + mesh_storage->mesh_get_path(p_mesh) + ")"; WARN_PRINT_ED(vformat("Attempting to use a shader %s that requires tangents with a mesh %s that doesn't contain tangents. Ensure that meshes are imported with the 'ensure_tangents' option. If creating your own meshes, add an `ARRAY_TANGENT` array (when using ArrayMesh) or call `generate_tangents()` (when using SurfaceTool).", shader_path, mesh_path)); } + +#if PRELOAD_PIPELINES_ON_SURFACE_CACHE_CONSTRUCTION + if (!sdcache->compilation_dirty_element.in_list()) { + geometry_surface_compilation_dirty_list.add(&sdcache->compilation_dirty_element); + } + + if (!sdcache->compilation_all_element.in_list()) { + geometry_surface_compilation_all_list.add(&sdcache->compilation_all_element); + } +#endif } void RenderForwardMobile::_geometry_instance_add_surface_with_material_chain(GeometryInstanceForwardMobile *ginstance, uint32_t p_surface, SceneShaderForwardMobile::MaterialData *p_material, RID p_mat_src, RID p_mesh) { @@ -2482,7 +2610,7 @@ void RenderForwardMobile::_geometry_instance_add_surface_with_material_chain(Geo while (material->next_pass.is_valid()) { RID next_pass = material->next_pass; material = static_cast(material_storage->material_get_data(next_pass, RendererRD::MaterialStorage::SHADER_TYPE_3D)); - if (!material || !material->shader_data->valid) { + if (!material || !material->shader_data->is_valid()) { break; } if (ginstance->data->dirty_dependencies) { @@ -2502,7 +2630,7 @@ void RenderForwardMobile::_geometry_instance_add_surface(GeometryInstanceForward if (m_src.is_valid()) { material = static_cast(material_storage->material_get_data(m_src, RendererRD::MaterialStorage::SHADER_TYPE_3D)); - if (!material || !material->shader_data->valid) { + if (!material || !material->shader_data->is_valid()) { material = nullptr; } } @@ -2524,7 +2652,7 @@ void RenderForwardMobile::_geometry_instance_add_surface(GeometryInstanceForward m_src = ginstance->data->material_overlay; material = static_cast(material_storage->material_get_data(m_src, RendererRD::MaterialStorage::SHADER_TYPE_3D)); - if (material && material->shader_data->valid) { + if (material && material->shader_data->is_valid()) { if (ginstance->data->dirty_dependencies) { material_storage->material_update_dependency(m_src, &ginstance->data->dependency_tracker); } @@ -2685,10 +2813,258 @@ void RenderForwardMobile::_geometry_instance_update(RenderGeometryInstance *p_ge ginstance->dirty_list_element.remove_from_list(); } +static RD::FramebufferFormatID _get_color_framebuffer_format_for_pipeline(RD::DataFormat p_color_format, bool p_can_be_storage, RD::TextureSamples p_samples, RD::TextureSamples p_target_samples, bool p_vrs, bool p_post_pass, bool p_hdr, uint32_t p_view_count) { + const bool multisampling = p_samples > RD::TEXTURE_SAMPLES_1; + RD::AttachmentFormat attachment; + + RD::AttachmentFormat unused_attachment; + unused_attachment.usage_flags = RD::AttachmentFormat::UNUSED_ATTACHMENT; + + thread_local Vector attachments; + attachments.clear(); + + // Color attachment. + attachment.samples = p_samples; + attachment.format = p_color_format; + attachment.usage_flags = RenderSceneBuffersRD::get_color_usage_bits(false, multisampling, p_can_be_storage); + attachments.push_back(attachment); + + // Depth attachment. + attachment.samples = p_samples; + attachment.format = RenderSceneBuffersRD::get_depth_format(false, multisampling, p_can_be_storage); + attachment.usage_flags = RenderSceneBuffersRD::get_depth_usage_bits(false, multisampling, p_can_be_storage); + attachments.push_back(attachment); + + if (p_vrs) { + attachment.samples = RD::TEXTURE_SAMPLES_1; + attachment.format = RenderSceneBuffersRD::get_vrs_format(); + attachment.usage_flags = RenderSceneBuffersRD::get_vrs_usage_bits(); + } + + if (multisampling) { + // Resolve attachment. + attachment.samples = RD::TEXTURE_SAMPLES_1; + attachment.format = p_color_format; + attachment.usage_flags = RenderSceneBuffersRD::get_color_usage_bits(true, false, p_can_be_storage); + attachments.push_back(attachment); + } + + RD::FramebufferPass pass; + thread_local Vector passes; + passes.clear(); + pass.color_attachments.clear(); + pass.color_attachments.push_back(0); + pass.depth_attachment = 1; + + if (p_vrs) { + pass.vrs_attachment = 2; + } + + if (multisampling) { + pass.resolve_attachments.push_back(attachments.size() - 1); + } + + passes.push_back(pass); + + if (p_post_pass) { + attachment.format = RendererRD::TextureStorage::render_target_get_color_format(p_hdr, false); + + if (p_view_count > 1 || p_target_samples == RD::TEXTURE_SAMPLES_1) { + attachment.samples = RD::TEXTURE_SAMPLES_1; + attachment.usage_flags = RendererRD::TextureStorage::render_target_get_color_usage_bits(false); + } else { + attachment.samples = p_target_samples; + attachment.usage_flags = RendererRD::TextureStorage::render_target_get_color_usage_bits(true); + } + + attachments.push_back(attachment); + + RD::FramebufferPass blit_pass; + blit_pass.input_attachments.push_back(multisampling ? (attachments.size() - 2) : 0); + blit_pass.color_attachments.push_back(attachments.size() - 1); + passes.push_back(blit_pass); + } + + return RD::get_singleton()->framebuffer_format_create_multipass(attachments, passes, p_view_count); +} + +static RD::FramebufferFormatID _get_reflection_probe_color_framebuffer_format_for_pipeline() { + RD::AttachmentFormat attachment; + thread_local Vector attachments; + attachments.clear(); + + attachment.format = RendererRD::LightStorage::get_reflection_probe_color_format(); + attachment.usage_flags = RendererRD::LightStorage::get_reflection_probe_color_usage_bits(); + attachments.push_back(attachment); + + attachment.format = RendererRD::LightStorage::get_reflection_probe_depth_format(); + attachment.usage_flags = RendererRD::LightStorage::get_reflection_probe_depth_usage_bits(); + attachments.push_back(attachment); + + return RD::get_singleton()->framebuffer_format_create(attachments); +} + +static RD::FramebufferFormatID _get_shadow_cubemap_framebuffer_format_for_pipeline() { + thread_local LocalVector attachments; + attachments.clear(); + + RD::AttachmentFormat attachment; + attachment.format = RendererRD::LightStorage::get_cubemap_depth_format(); + attachment.usage_flags = RendererRD::LightStorage::get_cubemap_depth_usage_bits(); + attachments.push_back(attachment); + + return RD::get_singleton()->framebuffer_format_create(attachments); +} + +static RD::FramebufferFormatID _get_shadow_atlas_framebuffer_format_for_pipeline(bool p_use_16_bits) { + thread_local LocalVector attachments; + attachments.clear(); + + RD::AttachmentFormat attachment; + attachment.format = RendererRD::LightStorage::get_shadow_atlas_depth_format(p_use_16_bits); + attachment.usage_flags = RendererRD::LightStorage::get_shadow_atlas_depth_usage_bits(); + attachments.push_back(attachment); + + return RD::get_singleton()->framebuffer_format_create(attachments); +} + +void RenderForwardMobile::_mesh_compile_pipeline_for_surface(SceneShaderForwardMobile::ShaderData *p_shader, void *p_mesh_surface, bool p_instanced_surface, RS::PipelineSource p_source, SceneShaderForwardMobile::ShaderData::PipelineKey &r_pipeline_key, Vector *r_pipeline_pairs) { + RendererRD::MeshStorage *mesh_storage = RendererRD::MeshStorage::get_singleton(); + uint64_t input_mask = p_shader->get_vertex_input_mask(r_pipeline_key.version, true); + r_pipeline_key.vertex_format_id = mesh_storage->mesh_surface_get_vertex_format(p_mesh_surface, input_mask, p_instanced_surface, false); + r_pipeline_key.ubershader = true; + p_shader->pipeline_hash_map.compile_pipeline(r_pipeline_key, r_pipeline_key.hash(), p_source); + + if (r_pipeline_pairs != nullptr) { + r_pipeline_pairs->push_back({ p_shader, r_pipeline_key }); + } +} + +void RenderForwardMobile::_mesh_compile_pipelines_for_surface(const SurfacePipelineData &p_surface, const GlobalPipelineData &p_global, RS::PipelineSource p_source, Vector *r_pipeline_pairs) { + RendererRD::MeshStorage *mesh_storage = RendererRD::MeshStorage::get_singleton(); + + // Set the attributes common to all pipelines. + SceneShaderForwardMobile::ShaderData::PipelineKey pipeline_key; + pipeline_key.cull_mode = RD::POLYGON_CULL_DISABLED; + pipeline_key.primitive_type = mesh_storage->mesh_surface_get_primitive(p_surface.mesh_surface); + pipeline_key.wireframe = false; + + const bool multiview_enabled = p_global.use_multiview && scene_shader.is_multiview_enabled(); + const RD::DataFormat buffers_color_format = _render_buffers_get_color_format(); + const bool buffers_can_be_storage = _render_buffers_can_be_storage(); + const uint32_t vrs_iterations = is_vrs_supported() ? 2 : 1; + for (uint32_t use_vrs = 0; use_vrs < vrs_iterations; use_vrs++) { + for (uint32_t use_post_pass = 0; use_post_pass < 2; use_post_pass++) { + const uint32_t hdr_iterations = use_post_pass ? 2 : 1; + for (uint32_t use_hdr = 0; use_hdr < hdr_iterations; use_hdr++) { + pipeline_key.version = SceneShaderForwardMobile::SHADER_VERSION_COLOR_PASS; + pipeline_key.framebuffer_format_id = _get_color_framebuffer_format_for_pipeline(buffers_color_format, buffers_can_be_storage, RD::TextureSamples(p_global.texture_samples), RD::TextureSamples(p_global.target_samples), use_vrs, use_post_pass, use_hdr, 1); + _mesh_compile_pipeline_for_surface(p_surface.shader, p_surface.mesh_surface, p_surface.instanced, p_source, pipeline_key, r_pipeline_pairs); + + if (p_global.use_lightmaps && p_surface.can_use_lightmap) { + pipeline_key.version = SceneShaderForwardMobile::SHADER_VERSION_LIGHTMAP_COLOR_PASS; + _mesh_compile_pipeline_for_surface(p_surface.shader, p_surface.mesh_surface, p_surface.instanced, p_source, pipeline_key, r_pipeline_pairs); + } + + if (multiview_enabled) { + // View count is assumed to be 2 as the configuration is dependent on the viewport. It's likely a safe assumption for stereo rendering. + const uint32_t view_count = 2; + pipeline_key.version = SceneShaderForwardMobile::SHADER_VERSION_COLOR_PASS_MULTIVIEW; + pipeline_key.framebuffer_format_id = _get_color_framebuffer_format_for_pipeline(buffers_color_format, buffers_can_be_storage, RD::TextureSamples(p_global.texture_samples), RD::TextureSamples(p_global.target_samples), use_vrs, use_post_pass, use_hdr, view_count); + _mesh_compile_pipeline_for_surface(p_surface.shader, p_surface.mesh_surface, p_surface.instanced, p_source, pipeline_key, r_pipeline_pairs); + + if (p_global.use_lightmaps && p_surface.can_use_lightmap) { + pipeline_key.version = SceneShaderForwardMobile::SHADER_VERSION_LIGHTMAP_COLOR_PASS_MULTIVIEW; + _mesh_compile_pipeline_for_surface(p_surface.shader, p_surface.mesh_surface, p_surface.instanced, p_source, pipeline_key, r_pipeline_pairs); + } + } + } + } + } + + if (p_global.use_reflection_probes) { + pipeline_key.version = SceneShaderForwardMobile::SHADER_VERSION_COLOR_PASS; + pipeline_key.framebuffer_format_id = _get_reflection_probe_color_framebuffer_format_for_pipeline(); + _mesh_compile_pipeline_for_surface(p_surface.shader, p_surface.mesh_surface, p_surface.instanced, p_source, pipeline_key, r_pipeline_pairs); + } + + if (!p_surface.uses_depth) { + return; + } + + if (p_global.use_shadow_cubemaps) { + pipeline_key.version = SceneShaderForwardMobile::SHADER_VERSION_SHADOW_PASS; + pipeline_key.framebuffer_format_id = _get_shadow_cubemap_framebuffer_format_for_pipeline(); + _mesh_compile_pipeline_for_surface(p_surface.shader_shadow, p_surface.mesh_surface_shadow, p_surface.instanced, p_source, pipeline_key, r_pipeline_pairs); + } + + const uint32_t use_16_bits_start = p_global.use_32_bit_shadows ? 0 : 1; + const uint32_t use_16_bits_iterations = p_global.use_16_bit_shadows ? 2 : 1; + for (uint32_t use_16_bits = use_16_bits_start; use_16_bits < use_16_bits_iterations; use_16_bits++) { + pipeline_key.version = SceneShaderForwardMobile::SHADER_VERSION_SHADOW_PASS; + pipeline_key.framebuffer_format_id = _get_shadow_atlas_framebuffer_format_for_pipeline(use_16_bits); + _mesh_compile_pipeline_for_surface(p_surface.shader_shadow, p_surface.mesh_surface_shadow, p_surface.instanced, p_source, pipeline_key, r_pipeline_pairs); + + if (p_global.use_shadow_dual_paraboloid) { + pipeline_key.version = SceneShaderForwardMobile::SHADER_VERSION_SHADOW_PASS_DP; + _mesh_compile_pipeline_for_surface(p_surface.shader_shadow, p_surface.mesh_surface_shadow, p_surface.instanced, p_source, pipeline_key, r_pipeline_pairs); + } + + if (multiview_enabled) { + pipeline_key.version = SceneShaderForwardMobile::SHADER_VERSION_SHADOW_PASS_MULTIVIEW; + _mesh_compile_pipeline_for_surface(p_surface.shader_shadow, p_surface.mesh_surface_shadow, p_surface.instanced, p_source, pipeline_key, r_pipeline_pairs); + } + } +} + +void RenderForwardMobile::_mesh_generate_all_pipelines_for_surface_cache(GeometryInstanceSurfaceDataCache *p_surface_cache, const GlobalPipelineData &p_global) { + bool uses_alpha_pass = (p_surface_cache->flags & GeometryInstanceSurfaceDataCache::FLAG_PASS_ALPHA) != 0; + SurfacePipelineData surface; + surface.mesh_surface = p_surface_cache->surface; + surface.mesh_surface_shadow = p_surface_cache->surface_shadow; + surface.shader = p_surface_cache->shader; + surface.shader_shadow = p_surface_cache->shader_shadow; + surface.instanced = p_surface_cache->owner->mesh_instance.is_valid(); + surface.uses_opaque = !uses_alpha_pass; + surface.uses_transparent = uses_alpha_pass; + surface.uses_depth = (p_surface_cache->flags & (GeometryInstanceSurfaceDataCache::FLAG_PASS_DEPTH | GeometryInstanceSurfaceDataCache::FLAG_PASS_OPAQUE | GeometryInstanceSurfaceDataCache::FLAG_PASS_SHADOW)) != 0; + surface.can_use_lightmap = p_surface_cache->owner->lightmap_instance.is_valid() || p_surface_cache->owner->lightmap_sh; + _mesh_compile_pipelines_for_surface(surface, p_global, RS::PIPELINE_SOURCE_SURFACE); +} + void RenderForwardMobile::_update_dirty_geometry_instances() { while (geometry_instance_dirty_list.first()) { _geometry_instance_update(geometry_instance_dirty_list.first()->self()); } + + if (global_pipeline_data_required.key != global_pipeline_data_compiled.key) { + // Go through the entire list of surfaces and compile pipelines for everything again. + SelfList *list = geometry_surface_compilation_all_list.first(); + while (list != nullptr) { + GeometryInstanceSurfaceDataCache *surface_cache = list->self(); + _mesh_generate_all_pipelines_for_surface_cache(surface_cache, global_pipeline_data_required); + + if (surface_cache->compilation_dirty_element.in_list()) { + // Remove any elements from the dirty list as they don't need to be processed again. + geometry_surface_compilation_dirty_list.remove(&surface_cache->compilation_dirty_element); + } + + list = list->next(); + } + + global_pipeline_data_compiled.key = global_pipeline_data_required.key; + } else { + // Compile pipelines only for the dirty list. + if (!geometry_surface_compilation_dirty_list.first()) { + return; + } + + while (geometry_surface_compilation_dirty_list.first() != nullptr) { + GeometryInstanceSurfaceDataCache *surface_cache = geometry_surface_compilation_dirty_list.first()->self(); + _mesh_generate_all_pipelines_for_surface_cache(surface_cache, global_pipeline_data_compiled); + surface_cache->compilation_dirty_element.remove_from_list(); + } + } } void RenderForwardMobile::_geometry_instance_dependency_changed(Dependency::DependencyChangedNotification p_notification, DependencyTracker *p_tracker) { @@ -2734,54 +3110,26 @@ uint32_t RenderForwardMobile::get_max_elements() const { RenderForwardMobile *RenderForwardMobile::singleton = nullptr; void RenderForwardMobile::_update_shader_quality_settings() { - Vector spec_constants; - - RD::PipelineSpecializationConstant sc; - sc.type = RD::PIPELINE_SPECIALIZATION_CONSTANT_TYPE_INT; - - sc.constant_id = SPEC_CONSTANT_SOFT_SHADOW_SAMPLES; - sc.int_value = soft_shadow_samples_get(); - - spec_constants.push_back(sc); - - sc.constant_id = SPEC_CONSTANT_PENUMBRA_SHADOW_SAMPLES; - sc.int_value = penumbra_shadow_samples_get(); - - spec_constants.push_back(sc); - - sc.constant_id = SPEC_CONSTANT_DIRECTIONAL_SOFT_SHADOW_SAMPLES; - sc.int_value = directional_soft_shadow_samples_get(); - - spec_constants.push_back(sc); - - sc.constant_id = SPEC_CONSTANT_DIRECTIONAL_PENUMBRA_SHADOW_SAMPLES; - sc.int_value = directional_penumbra_shadow_samples_get(); - - spec_constants.push_back(sc); - - sc.type = RD::PIPELINE_SPECIALIZATION_CONSTANT_TYPE_BOOL; - sc.constant_id = SPEC_CONSTANT_DECAL_USE_MIPMAPS; - sc.bool_value = decals_get_filter() == RS::DECAL_FILTER_NEAREST_MIPMAPS || + SceneShaderForwardMobile::ShaderSpecialization specialization = {}; + specialization.soft_shadow_samples = soft_shadow_samples_get(); + specialization.penumbra_shadow_samples = penumbra_shadow_samples_get(); + specialization.directional_soft_shadow_samples = directional_soft_shadow_samples_get(); + specialization.directional_penumbra_shadow_samples = directional_penumbra_shadow_samples_get(); + specialization.decal_use_mipmaps = + decals_get_filter() == RS::DECAL_FILTER_NEAREST_MIPMAPS || decals_get_filter() == RS::DECAL_FILTER_LINEAR_MIPMAPS || decals_get_filter() == RS::DECAL_FILTER_NEAREST_MIPMAPS_ANISOTROPIC || decals_get_filter() == RS::DECAL_FILTER_LINEAR_MIPMAPS_ANISOTROPIC; - spec_constants.push_back(sc); - - sc.constant_id = SPEC_CONSTANT_PROJECTOR_USE_MIPMAPS; - sc.bool_value = light_projectors_get_filter() == RS::LIGHT_PROJECTOR_FILTER_NEAREST_MIPMAPS || + specialization.projector_use_mipmaps = + light_projectors_get_filter() == RS::LIGHT_PROJECTOR_FILTER_NEAREST_MIPMAPS || light_projectors_get_filter() == RS::LIGHT_PROJECTOR_FILTER_LINEAR_MIPMAPS || light_projectors_get_filter() == RS::LIGHT_PROJECTOR_FILTER_NEAREST_MIPMAPS_ANISOTROPIC || light_projectors_get_filter() == RS::LIGHT_PROJECTOR_FILTER_LINEAR_MIPMAPS_ANISOTROPIC; - spec_constants.push_back(sc); - - sc.constant_id = SPEC_CONSTANT_USE_LIGHTMAP_BICUBIC_FILTER; - sc.bool_value = lightmap_filter_bicubic_get(); - - spec_constants.push_back(sc); - - scene_shader.set_default_specialization_constants(spec_constants); + specialization.use_lightmap_bicubic_filter = lightmap_filter_bicubic_get(); + specialization.luminance_multiplier = 2.0f; + scene_shader.set_default_specialization(specialization); base_uniforms_changed(); //also need this } diff --git a/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.h b/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.h index 9e429d598adb..a2178083ce23 100644 --- a/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.h +++ b/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.h @@ -59,32 +59,6 @@ class RenderForwardMobile : public RendererSceneRenderRD { MATERIAL_UNIFORM_SET = 3, }; - enum { - - SPEC_CONSTANT_USING_PROJECTOR = 0, - SPEC_CONSTANT_USING_SOFT_SHADOWS = 1, - SPEC_CONSTANT_USING_DIRECTIONAL_SOFT_SHADOWS = 2, - - SPEC_CONSTANT_SOFT_SHADOW_SAMPLES = 3, - SPEC_CONSTANT_PENUMBRA_SHADOW_SAMPLES = 4, - SPEC_CONSTANT_DIRECTIONAL_SOFT_SHADOW_SAMPLES = 5, - SPEC_CONSTANT_DIRECTIONAL_PENUMBRA_SHADOW_SAMPLES = 6, - - SPEC_CONSTANT_DECAL_USE_MIPMAPS = 7, - SPEC_CONSTANT_PROJECTOR_USE_MIPMAPS = 8, - - SPEC_CONSTANT_DISABLE_OMNI_LIGHTS = 9, - SPEC_CONSTANT_DISABLE_SPOT_LIGHTS = 10, - SPEC_CONSTANT_DISABLE_REFLECTION_PROBES = 11, - SPEC_CONSTANT_DISABLE_DIRECTIONAL_LIGHTS = 12, - - SPEC_CONSTANT_DISABLE_DECALS = 13, - SPEC_CONSTANT_DISABLE_FOG = 14, - SPEC_CONSTANT_USE_DEPTH_FOG = 16, - SPEC_CONSTANT_IS_MULTIMESH = 17, - SPEC_CONSTANT_USE_LIGHTMAP_BICUBIC_FILTER = 18, - }; - enum { MAX_LIGHTMAPS = 8, MAX_RDL_CULL = 8, // maximum number of reflection probes, decals or lights we can cull per geometry instance @@ -152,14 +126,14 @@ class RenderForwardMobile : public RendererSceneRenderRD { RID render_pass_uniform_set; bool force_wireframe = false; Vector2 uv_offset; - uint32_t spec_constant_base_flags = 0; + SceneShaderForwardMobile::ShaderSpecialization base_specialization; float lod_distance_multiplier = 0.0; float screen_mesh_lod_threshold = 0.0; RD::FramebufferFormatID framebuffer_format = 0; uint32_t element_offset = 0; uint32_t subpass = 0; - RenderListParameters(GeometryInstanceSurfaceDataCache **p_elements, RenderElementInfo *p_element_info, int p_element_count, bool p_reverse_cull, PassMode p_pass_mode, RID p_render_pass_uniform_set, uint32_t p_spec_constant_base_flags = 0, bool p_force_wireframe = false, const Vector2 &p_uv_offset = Vector2(), float p_lod_distance_multiplier = 0.0, float p_screen_mesh_lod_threshold = 0.0, uint32_t p_view_count = 1, uint32_t p_element_offset = 0) { + RenderListParameters(GeometryInstanceSurfaceDataCache **p_elements, RenderElementInfo *p_element_info, int p_element_count, bool p_reverse_cull, PassMode p_pass_mode, RID p_render_pass_uniform_set, SceneShaderForwardMobile::ShaderSpecialization p_base_specialization, bool p_force_wireframe = false, const Vector2 &p_uv_offset = Vector2(), float p_lod_distance_multiplier = 0.0, float p_screen_mesh_lod_threshold = 0.0, uint32_t p_view_count = 1, uint32_t p_element_offset = 0) { elements = p_elements; element_info = p_element_info; element_count = p_element_count; @@ -173,7 +147,7 @@ class RenderForwardMobile : public RendererSceneRenderRD { lod_distance_multiplier = p_lod_distance_multiplier; screen_mesh_lod_threshold = p_screen_mesh_lod_threshold; element_offset = p_element_offset; - spec_constant_base_flags = p_spec_constant_base_flags; + base_specialization = p_base_specialization; } }; @@ -222,10 +196,16 @@ class RenderForwardMobile : public RendererSceneRenderRD { struct SceneState { LocalVector uniform_buffers; + struct PushConstantUbershader { + SceneShaderForwardMobile::ShaderSpecialization specialization; + SceneShaderForwardMobile::UbershaderConstants constants; + }; + struct PushConstant { float uv_offset[2]; uint32_t base_index; uint32_t pad; + PushConstantUbershader ubershader; }; struct InstanceData { @@ -264,6 +244,7 @@ class RenderForwardMobile : public RendererSceneRenderRD { bool used_normal_texture = false; bool used_depth_texture = false; bool used_sss = false; + bool used_lightmap = false; struct ShadowPass { uint32_t element_from; @@ -455,6 +436,12 @@ class RenderForwardMobile : public RendererSceneRenderRD { GeometryInstanceSurfaceDataCache *next = nullptr; GeometryInstanceForwardMobile *owner = nullptr; + + SelfList compilation_dirty_element; + SelfList compilation_all_element; + + GeometryInstanceSurfaceDataCache() : + compilation_dirty_element(this), compilation_all_element(this) {} }; class GeometryInstanceForwardMobile : public RenderGeometryInstanceBase { @@ -560,15 +547,57 @@ class RenderForwardMobile : public RendererSceneRenderRD { static void _geometry_instance_dependency_deleted(const RID &p_dependency, DependencyTracker *p_tracker); SelfList::List geometry_instance_dirty_list; + SelfList::List geometry_surface_compilation_dirty_list; + SelfList::List geometry_surface_compilation_all_list; PagedAllocator geometry_instance_alloc; PagedAllocator geometry_instance_surface_alloc; PagedAllocator geometry_instance_lightmap_sh; + struct SurfacePipelineData { + void *mesh_surface = nullptr; + void *mesh_surface_shadow = nullptr; + SceneShaderForwardMobile::ShaderData *shader = nullptr; + SceneShaderForwardMobile::ShaderData *shader_shadow = nullptr; + bool instanced = false; + bool uses_opaque = false; + bool uses_transparent = false; + bool uses_depth = false; + bool can_use_lightmap = false; + }; + + struct GlobalPipelineData { + union { + struct { + uint32_t texture_samples : 3; + uint32_t target_samples : 3; + uint32_t use_reflection_probes : 1; + uint32_t use_lightmaps : 1; + uint32_t use_multiview : 1; + uint32_t use_16_bit_shadows : 1; + uint32_t use_32_bit_shadows : 1; + uint32_t use_shadow_cubemaps : 1; + uint32_t use_shadow_dual_paraboloid : 1; + }; + + uint32_t key; + }; + }; + + GlobalPipelineData global_pipeline_data_compiled = {}; + GlobalPipelineData global_pipeline_data_required = {}; + + typedef Pair ShaderPipelinePair; + + void _update_global_pipeline_data_requirements_from_project(); + void _update_global_pipeline_data_requirements_from_light_storage(); void _geometry_instance_add_surface_with_material(GeometryInstanceForwardMobile *ginstance, uint32_t p_surface, SceneShaderForwardMobile::MaterialData *p_material, uint32_t p_material_id, uint32_t p_shader_id, RID p_mesh); void _geometry_instance_add_surface_with_material_chain(GeometryInstanceForwardMobile *ginstance, uint32_t p_surface, SceneShaderForwardMobile::MaterialData *p_material, RID p_mat_src, RID p_mesh); void _geometry_instance_add_surface(GeometryInstanceForwardMobile *ginstance, uint32_t p_surface, RID p_material, RID p_mesh); void _geometry_instance_update(RenderGeometryInstance *p_geometry_instance); + void _mesh_compile_pipeline_for_surface(SceneShaderForwardMobile::ShaderData *p_shader, void *p_mesh_surface, bool p_instanced_surface, RS::PipelineSource p_source, SceneShaderForwardMobile::ShaderData::PipelineKey &r_pipeline_key, Vector *r_pipeline_pairs = nullptr); + void _mesh_compile_pipelines_for_surface(const SurfacePipelineData &p_surface, const GlobalPipelineData &p_global, RS::PipelineSource p_source, Vector *r_pipeline_pairs = nullptr); + void _mesh_generate_all_pipelines_for_surface_cache(GeometryInstanceSurfaceDataCache *p_surface_cache, const GlobalPipelineData &p_global); void _update_dirty_geometry_instances(); virtual RenderGeometryInstance *geometry_instance_create(RID p_base) override; @@ -576,8 +605,15 @@ class RenderForwardMobile : public RendererSceneRenderRD { virtual uint32_t geometry_instance_get_pair_mask() override; + /* PIPELINES */ + + virtual void mesh_generate_pipelines(RID p_mesh, bool p_background_compilation) override; + virtual uint32_t get_pipeline_compilations(RS::PipelineSource p_source) override; + virtual bool free(RID p_rid) override; + virtual void update() override; + virtual void base_uniforms_changed() override; virtual bool is_dynamic_gi_supported() const override; diff --git a/servers/rendering/renderer_rd/forward_mobile/scene_shader_forward_mobile.cpp b/servers/rendering/renderer_rd/forward_mobile/scene_shader_forward_mobile.cpp index 08982096c566..c6009dcc4f2a 100644 --- a/servers/rendering/renderer_rd/forward_mobile/scene_shader_forward_mobile.cpp +++ b/servers/rendering/renderer_rd/forward_mobile/scene_shader_forward_mobile.cpp @@ -43,9 +43,9 @@ void SceneShaderForwardMobile::ShaderData::set_code(const String &p_code) { //compile code = p_code; - valid = false; ubo_size = 0; uniforms.clear(); + _clear_vertex_input_mask_cache(); if (code.is_empty()) { return; //just invalid, but no error @@ -53,10 +53,10 @@ void SceneShaderForwardMobile::ShaderData::set_code(const String &p_code) { ShaderCompiler::GeneratedCode gen_code; - int blend_mode = BLEND_MODE_MIX; - int depth_testi = DEPTH_TEST_ENABLED; - int alpha_antialiasing_mode = ALPHA_ANTIALIASING_OFF; - int cull = CULL_BACK; + blend_mode = BLEND_MODE_MIX; + depth_testi = DEPTH_TEST_ENABLED; + alpha_antialiasing_mode = ALPHA_ANTIALIASING_OFF; + cull_mode = CULL_BACK; uses_point_size = false; uses_alpha = false; @@ -68,8 +68,8 @@ void SceneShaderForwardMobile::ShaderData::set_code(const String &p_code) { uses_roughness = false; uses_normal = false; uses_tangent = false; - bool uses_normal_map = false; - bool wireframe = false; + uses_normal_map = false; + wireframe = false; unshaded = false; uses_vertex = false; @@ -91,7 +91,7 @@ void SceneShaderForwardMobile::ShaderData::set_code(const String &p_code) { actions.render_mode_values["blend_mix"] = Pair(&blend_mode, BLEND_MODE_MIX); actions.render_mode_values["blend_sub"] = Pair(&blend_mode, BLEND_MODE_SUB); actions.render_mode_values["blend_mul"] = Pair(&blend_mode, BLEND_MODE_MUL); - actions.render_mode_values["blend_premul_alpha"] = Pair(&blend_mode, BLEND_MODE_PREMULT_ALPHA); + actions.render_mode_values["blend_premul_alpha"] = Pair(&blend_mode, BLEND_MODE_PREMULTIPLIED_ALPHA); actions.render_mode_values["alpha_to_coverage"] = Pair(&alpha_antialiasing_mode, ALPHA_ANTIALIASING_ALPHA_TO_COVERAGE); actions.render_mode_values["alpha_to_coverage_and_one"] = Pair(&alpha_antialiasing_mode, ALPHA_ANTIALIASING_ALPHA_TO_COVERAGE_AND_TO_ONE); @@ -102,9 +102,9 @@ void SceneShaderForwardMobile::ShaderData::set_code(const String &p_code) { actions.render_mode_values["depth_test_disabled"] = Pair(&depth_testi, DEPTH_TEST_DISABLED); - actions.render_mode_values["cull_disabled"] = Pair(&cull, CULL_DISABLED); - actions.render_mode_values["cull_front"] = Pair(&cull, CULL_FRONT); - actions.render_mode_values["cull_back"] = Pair(&cull, CULL_BACK); + actions.render_mode_values["cull_disabled"] = Pair(&cull_mode, CULL_DISABLED); + actions.render_mode_values["cull_front"] = Pair(&cull_mode, CULL_FRONT); + actions.render_mode_values["cull_back"] = Pair(&cull_mode, CULL_BACK); actions.render_mode_flags["unshaded"] = &unshaded; actions.render_mode_flags["wireframe"] = &wireframe; @@ -141,13 +141,12 @@ void SceneShaderForwardMobile::ShaderData::set_code(const String &p_code) { actions.uniforms = &uniforms; - SceneShaderForwardMobile *shader_singleton = (SceneShaderForwardMobile *)SceneShaderForwardMobile::singleton; - - Error err = shader_singleton->compiler.compile(RS::SHADER_SPATIAL, code, &actions, path, gen_code); + MutexLock lock(SceneShaderForwardMobile::singleton_mutex); + Error err = SceneShaderForwardMobile::singleton->compiler.compile(RS::SHADER_SPATIAL, code, &actions, path, gen_code); ERR_FAIL_COND_MSG(err != OK, "Shader compilation failed."); if (version.is_null()) { - version = shader_singleton->shader.version_create(); + version = SceneShaderForwardMobile::singleton->shader.version_create(); } depth_draw = DepthDraw(depth_drawi); @@ -189,86 +188,60 @@ void SceneShaderForwardMobile::ShaderData::set_code(const String &p_code) { print_line("\n**fragment_globals:\n" + gen_code.stage_globals[ShaderCompiler::STAGE_FRAGMENT]); #endif - shader_singleton->shader.version_set_code(version, gen_code.code, gen_code.uniforms, gen_code.stage_globals[ShaderCompiler::STAGE_VERTEX], gen_code.stage_globals[ShaderCompiler::STAGE_FRAGMENT], gen_code.defines); - ERR_FAIL_COND(!shader_singleton->shader.version_is_valid(version)); + SceneShaderForwardMobile::singleton->shader.version_set_code(version, gen_code.code, gen_code.uniforms, gen_code.stage_globals[ShaderCompiler::STAGE_VERTEX], gen_code.stage_globals[ShaderCompiler::STAGE_FRAGMENT], gen_code.defines); ubo_size = gen_code.uniform_total_size; ubo_offsets = gen_code.uniform_offsets; texture_uniforms = gen_code.texture_uniforms; - //blend modes + pipeline_hash_map.clear_pipelines(); - // if any form of Alpha Antialiasing is enabled, set the blend mode to alpha to coverage + // If any form of Alpha Antialiasing is enabled, set the blend mode to alpha to coverage. if (alpha_antialiasing_mode != ALPHA_ANTIALIASING_OFF) { blend_mode = BLEND_MODE_ALPHA_TO_COVERAGE; } - RD::PipelineColorBlendState::Attachment blend_attachment; - - switch (blend_mode) { - case BLEND_MODE_MIX: { - blend_attachment.enable_blend = true; - blend_attachment.alpha_blend_op = RD::BLEND_OP_ADD; - blend_attachment.color_blend_op = RD::BLEND_OP_ADD; - blend_attachment.src_color_blend_factor = RD::BLEND_FACTOR_SRC_ALPHA; - blend_attachment.dst_color_blend_factor = RD::BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; - blend_attachment.src_alpha_blend_factor = RD::BLEND_FACTOR_ONE; - blend_attachment.dst_alpha_blend_factor = RD::BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; - - } break; - case BLEND_MODE_ADD: { - blend_attachment.enable_blend = true; - blend_attachment.alpha_blend_op = RD::BLEND_OP_ADD; - blend_attachment.color_blend_op = RD::BLEND_OP_ADD; - blend_attachment.src_color_blend_factor = RD::BLEND_FACTOR_SRC_ALPHA; - blend_attachment.dst_color_blend_factor = RD::BLEND_FACTOR_ONE; - blend_attachment.src_alpha_blend_factor = RD::BLEND_FACTOR_SRC_ALPHA; - blend_attachment.dst_alpha_blend_factor = RD::BLEND_FACTOR_ONE; - uses_blend_alpha = true; //force alpha used because of blend - - } break; - case BLEND_MODE_SUB: { - blend_attachment.enable_blend = true; - blend_attachment.alpha_blend_op = RD::BLEND_OP_REVERSE_SUBTRACT; - blend_attachment.color_blend_op = RD::BLEND_OP_REVERSE_SUBTRACT; - blend_attachment.src_color_blend_factor = RD::BLEND_FACTOR_SRC_ALPHA; - blend_attachment.dst_color_blend_factor = RD::BLEND_FACTOR_ONE; - blend_attachment.src_alpha_blend_factor = RD::BLEND_FACTOR_SRC_ALPHA; - blend_attachment.dst_alpha_blend_factor = RD::BLEND_FACTOR_ONE; - uses_blend_alpha = true; //force alpha used because of blend - - } break; - case BLEND_MODE_MUL: { - blend_attachment.enable_blend = true; - blend_attachment.alpha_blend_op = RD::BLEND_OP_ADD; - blend_attachment.color_blend_op = RD::BLEND_OP_ADD; - blend_attachment.src_color_blend_factor = RD::BLEND_FACTOR_DST_COLOR; - blend_attachment.dst_color_blend_factor = RD::BLEND_FACTOR_ZERO; - blend_attachment.src_alpha_blend_factor = RD::BLEND_FACTOR_DST_ALPHA; - blend_attachment.dst_alpha_blend_factor = RD::BLEND_FACTOR_ZERO; - uses_blend_alpha = true; //force alpha used because of blend - } break; - case BLEND_MODE_ALPHA_TO_COVERAGE: { - blend_attachment.enable_blend = true; - blend_attachment.alpha_blend_op = RD::BLEND_OP_ADD; - blend_attachment.color_blend_op = RD::BLEND_OP_ADD; - blend_attachment.src_color_blend_factor = RD::BLEND_FACTOR_SRC_ALPHA; - blend_attachment.dst_color_blend_factor = RD::BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; - blend_attachment.src_alpha_blend_factor = RD::BLEND_FACTOR_ONE; - blend_attachment.dst_alpha_blend_factor = RD::BLEND_FACTOR_ZERO; - } break; - case BLEND_MODE_PREMULT_ALPHA: { - blend_attachment.enable_blend = true; - blend_attachment.alpha_blend_op = RD::BLEND_OP_ADD; - blend_attachment.color_blend_op = RD::BLEND_OP_ADD; - blend_attachment.src_color_blend_factor = RD::BLEND_FACTOR_ONE; - blend_attachment.dst_color_blend_factor = RD::BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; - blend_attachment.src_alpha_blend_factor = RD::BLEND_FACTOR_ONE; - blend_attachment.dst_alpha_blend_factor = RD::BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; - uses_blend_alpha = true; // Force alpha used because of blend. - } break; + uses_blend_alpha = blend_mode_uses_blend_alpha(BlendMode(blend_mode)); +} + +bool SceneShaderForwardMobile::ShaderData::is_animated() const { + return (uses_fragment_time && uses_discard) || (uses_vertex_time && uses_vertex); +} + +bool SceneShaderForwardMobile::ShaderData::casts_shadows() const { + bool has_read_screen_alpha = uses_screen_texture || uses_depth_texture || uses_normal_texture; + bool has_base_alpha = (uses_alpha && (!uses_alpha_clip || uses_alpha_antialiasing)) || has_read_screen_alpha; + bool has_alpha = has_base_alpha || uses_blend_alpha; + + return !has_alpha || (uses_depth_prepass_alpha && !(depth_draw == DEPTH_DRAW_DISABLED || depth_test == DEPTH_TEST_DISABLED)); +} + +RS::ShaderNativeSourceCode SceneShaderForwardMobile::ShaderData::get_native_source_code() const { + if (version.is_valid()) { + MutexLock lock(SceneShaderForwardMobile::singleton_mutex); + return SceneShaderForwardMobile::singleton->shader.version_get_native_source_code(version); + } else { + return RS::ShaderNativeSourceCode(); } +} + +void SceneShaderForwardMobile::ShaderData::_create_pipeline(PipelineKey p_pipeline_key) { +#if PRINT_PIPELINE_COMPILATION_KEYS + print_line( + "HASH:", p_pipeline_key.hash(), + "VERSION:", version, + "VERTEX:", p_pipeline_key.vertex_format_id, + "FRAMEBUFFER:", p_pipeline_key.framebuffer_format_id, + "CULL:", p_pipeline_key.cull_mode, + "PRIMITIVE:", p_pipeline_key.primitive_type, + "VERSION:", p_pipeline_key.version, + "SPEC PACKED #0:", p_pipeline_key.shader_specialization.packed_0, + "SPEC PACKED #1:", p_pipeline_key.shader_specialization.packed_1, + "RENDER PASS:", p_pipeline_key.render_pass, + "WIREFRAME:", p_pipeline_key.wireframe); +#endif + RD::PipelineColorBlendState::Attachment blend_attachment = blend_mode_to_blend_attachment(BlendMode(blend_mode)); RD::PipelineColorBlendState blend_state_blend; blend_state_blend.attachments.push_back(blend_attachment); RD::PipelineColorBlendState blend_state_opaque = RD::PipelineColorBlendState::create_disabled(1); @@ -286,113 +259,151 @@ void SceneShaderForwardMobile::ShaderData::set_code(const String &p_code) { depth_stencil_state.enable_depth_write = depth_draw != DEPTH_DRAW_DISABLED ? true : false; } - for (int i = 0; i < CULL_VARIANT_MAX; i++) { - RD::PolygonCullMode cull_mode_rd_table[CULL_VARIANT_MAX][3] = { - { RD::POLYGON_CULL_DISABLED, RD::POLYGON_CULL_FRONT, RD::POLYGON_CULL_BACK }, - { RD::POLYGON_CULL_DISABLED, RD::POLYGON_CULL_BACK, RD::POLYGON_CULL_FRONT }, - { RD::POLYGON_CULL_DISABLED, RD::POLYGON_CULL_DISABLED, RD::POLYGON_CULL_DISABLED } - }; - - RD::PolygonCullMode cull_mode_rd = cull_mode_rd_table[i][cull]; - - for (int j = 0; j < RS::PRIMITIVE_MAX; j++) { - RD::RenderPrimitive primitive_rd_table[RS::PRIMITIVE_MAX] = { - RD::RENDER_PRIMITIVE_POINTS, - RD::RENDER_PRIMITIVE_LINES, - RD::RENDER_PRIMITIVE_LINESTRIPS, - RD::RENDER_PRIMITIVE_TRIANGLES, - RD::RENDER_PRIMITIVE_TRIANGLE_STRIPS, - }; - - RD::RenderPrimitive primitive_rd = uses_point_size ? RD::RENDER_PRIMITIVE_POINTS : primitive_rd_table[j]; - - for (int k = 0; k < SHADER_VERSION_MAX; k++) { - if (!static_cast(singleton)->shader.is_variant_enabled(k)) { - continue; - } - RD::PipelineRasterizationState raster_state; - raster_state.cull_mode = cull_mode_rd; - raster_state.wireframe = wireframe; - - RD::PipelineColorBlendState blend_state; - RD::PipelineDepthStencilState depth_stencil = depth_stencil_state; - RD::PipelineMultisampleState multisample_state; - - if (uses_alpha || uses_blend_alpha) { - // only allow these flags to go through if we have some form of msaa - if (alpha_antialiasing_mode == ALPHA_ANTIALIASING_ALPHA_TO_COVERAGE) { - multisample_state.enable_alpha_to_coverage = true; - } else if (alpha_antialiasing_mode == ALPHA_ANTIALIASING_ALPHA_TO_COVERAGE_AND_TO_ONE) { - multisample_state.enable_alpha_to_coverage = true; - multisample_state.enable_alpha_to_one = true; - } - - if (k == SHADER_VERSION_COLOR_PASS || k == SHADER_VERSION_COLOR_PASS_MULTIVIEW || k == SHADER_VERSION_LIGHTMAP_COLOR_PASS || k == SHADER_VERSION_LIGHTMAP_COLOR_PASS_MULTIVIEW) { - blend_state = blend_state_blend; - if (depth_draw == DEPTH_DRAW_OPAQUE && !uses_alpha_clip) { - depth_stencil.enable_depth_write = false; //alpha does not draw depth - } - } else if (k == SHADER_VERSION_SHADOW_PASS || k == SHADER_VERSION_SHADOW_PASS_MULTIVIEW || k == SHADER_VERSION_SHADOW_PASS_DP) { - //none, blend state contains nothing - } else if (k == SHADER_VERSION_DEPTH_PASS_WITH_MATERIAL) { - blend_state = RD::PipelineColorBlendState::create_disabled(5); //writes to normal and roughness in opaque way - } else { - pipelines[i][j][k].clear(); - continue; // do not use this version (will error if using it is attempted) - } - } else { - if (k == SHADER_VERSION_COLOR_PASS || k == SHADER_VERSION_COLOR_PASS_MULTIVIEW || k == SHADER_VERSION_LIGHTMAP_COLOR_PASS || k == SHADER_VERSION_LIGHTMAP_COLOR_PASS_MULTIVIEW) { - blend_state = blend_state_opaque; - } else if (k == SHADER_VERSION_SHADOW_PASS || k == SHADER_VERSION_SHADOW_PASS_MULTIVIEW || k == SHADER_VERSION_SHADOW_PASS_DP) { - //none, leave empty - } else if (k == SHADER_VERSION_DEPTH_PASS_WITH_MATERIAL) { - blend_state = RD::PipelineColorBlendState::create_disabled(5); //writes to normal and roughness in opaque way - } else { - // ??? - } - } - - RID shader_variant = shader_singleton->shader.version_get_shader(version, k); - pipelines[i][j][k].setup(shader_variant, primitive_rd, raster_state, multisample_state, depth_stencil, blend_state, 0, singleton->default_specialization_constants); + RD::RenderPrimitive primitive_rd_table[RS::PRIMITIVE_MAX] = { + RD::RENDER_PRIMITIVE_POINTS, + RD::RENDER_PRIMITIVE_LINES, + RD::RENDER_PRIMITIVE_LINESTRIPS, + RD::RENDER_PRIMITIVE_TRIANGLES, + RD::RENDER_PRIMITIVE_TRIANGLE_STRIPS, + }; + + RD::RenderPrimitive primitive_rd = uses_point_size ? RD::RENDER_PRIMITIVE_POINTS : primitive_rd_table[p_pipeline_key.primitive_type]; + + RD::PipelineRasterizationState raster_state; + raster_state.cull_mode = p_pipeline_key.cull_mode; + raster_state.wireframe = wireframe || p_pipeline_key.wireframe; + + RD::PipelineMultisampleState multisample_state; + multisample_state.sample_count = RD::get_singleton()->framebuffer_format_get_texture_samples(p_pipeline_key.framebuffer_format_id, 0); + + RD::PipelineColorBlendState blend_state; + if (uses_alpha || uses_blend_alpha) { + // These flags should only go through if we have some form of MSAA. + if (alpha_antialiasing_mode == ALPHA_ANTIALIASING_ALPHA_TO_COVERAGE) { + multisample_state.enable_alpha_to_coverage = true; + } else if (alpha_antialiasing_mode == ALPHA_ANTIALIASING_ALPHA_TO_COVERAGE_AND_TO_ONE) { + multisample_state.enable_alpha_to_coverage = true; + multisample_state.enable_alpha_to_one = true; + } + + if (p_pipeline_key.version == SHADER_VERSION_COLOR_PASS || p_pipeline_key.version == SHADER_VERSION_COLOR_PASS_MULTIVIEW || p_pipeline_key.version == SHADER_VERSION_LIGHTMAP_COLOR_PASS || p_pipeline_key.version == SHADER_VERSION_LIGHTMAP_COLOR_PASS_MULTIVIEW) { + blend_state = blend_state_blend; + if (depth_draw == DEPTH_DRAW_OPAQUE && !uses_alpha_clip) { + // Alpha does not write to depth. + depth_stencil_state.enable_depth_write = false; } + } else if (p_pipeline_key.version == SHADER_VERSION_SHADOW_PASS || p_pipeline_key.version == SHADER_VERSION_SHADOW_PASS_MULTIVIEW || p_pipeline_key.version == SHADER_VERSION_SHADOW_PASS_DP) { + // Contains nothing. + } else if (p_pipeline_key.version == SHADER_VERSION_DEPTH_PASS_WITH_MATERIAL) { + // Writes to normal and roughness in opaque way. + blend_state = RD::PipelineColorBlendState::create_disabled(5); + } else { + // Do not use this version (error case). + } + } else { + if (p_pipeline_key.version == SHADER_VERSION_COLOR_PASS || p_pipeline_key.version == SHADER_VERSION_COLOR_PASS_MULTIVIEW || p_pipeline_key.version == SHADER_VERSION_LIGHTMAP_COLOR_PASS || p_pipeline_key.version == SHADER_VERSION_LIGHTMAP_COLOR_PASS_MULTIVIEW) { + blend_state = blend_state_opaque; + } else if (p_pipeline_key.version == SHADER_VERSION_SHADOW_PASS || p_pipeline_key.version == SHADER_VERSION_SHADOW_PASS_MULTIVIEW || p_pipeline_key.version == SHADER_VERSION_SHADOW_PASS_DP) { + // Contains nothing. + } else if (p_pipeline_key.version == SHADER_VERSION_DEPTH_PASS_WITH_MATERIAL) { + // Writes to normal and roughness in opaque way. + blend_state = RD::PipelineColorBlendState::create_disabled(5); + } else { + // Unknown pipeline version. } } - valid = true; + // Convert the specialization from the key to pipeline specialization constants. + Vector specialization_constants; + RD::PipelineSpecializationConstant sc; + sc.constant_id = 0; + sc.int_value = p_pipeline_key.shader_specialization.packed_0; + sc.type = RD::PIPELINE_SPECIALIZATION_CONSTANT_TYPE_INT; + specialization_constants.push_back(sc); + + sc.constant_id = 1; + sc.float_value = p_pipeline_key.shader_specialization.packed_1; + sc.type = RD::PIPELINE_SPECIALIZATION_CONSTANT_TYPE_FLOAT; + specialization_constants.push_back(sc); + + RID shader_rid = get_shader_variant(p_pipeline_key.version, p_pipeline_key.ubershader); + ERR_FAIL_COND(shader_rid.is_null()); + + RID pipeline = RD::get_singleton()->render_pipeline_create(shader_rid, p_pipeline_key.framebuffer_format_id, p_pipeline_key.vertex_format_id, primitive_rd, raster_state, multisample_state, depth_stencil_state, blend_state, 0, p_pipeline_key.render_pass, specialization_constants); + ERR_FAIL_COND(pipeline.is_null()); + + pipeline_hash_map.add_compiled_pipeline(p_pipeline_key.hash(), pipeline); } -bool SceneShaderForwardMobile::ShaderData::is_animated() const { - return (uses_fragment_time && uses_discard) || (uses_vertex_time && uses_vertex); +RD::PolygonCullMode SceneShaderForwardMobile::ShaderData::get_cull_mode_from_cull_variant(CullVariant p_cull_variant) { + const RD::PolygonCullMode cull_mode_rd_table[CULL_VARIANT_MAX][3] = { + { RD::POLYGON_CULL_DISABLED, RD::POLYGON_CULL_FRONT, RD::POLYGON_CULL_BACK }, + { RD::POLYGON_CULL_DISABLED, RD::POLYGON_CULL_BACK, RD::POLYGON_CULL_FRONT }, + { RD::POLYGON_CULL_DISABLED, RD::POLYGON_CULL_DISABLED, RD::POLYGON_CULL_DISABLED } + }; + + return cull_mode_rd_table[p_cull_variant][cull_mode]; } -bool SceneShaderForwardMobile::ShaderData::casts_shadows() const { - bool has_read_screen_alpha = uses_screen_texture || uses_depth_texture || uses_normal_texture; - bool has_base_alpha = (uses_alpha && (!uses_alpha_clip || uses_alpha_antialiasing)) || has_read_screen_alpha; - bool has_alpha = has_base_alpha || uses_blend_alpha; +void SceneShaderForwardMobile::ShaderData::_clear_vertex_input_mask_cache() { + for (uint32_t i = 0; i < VERTEX_INPUT_MASKS_SIZE; i++) { + vertex_input_masks[i].store(0); + } +} - return !has_alpha || (uses_depth_prepass_alpha && !(depth_draw == DEPTH_DRAW_DISABLED || depth_test == DEPTH_TEST_DISABLED)); +RID SceneShaderForwardMobile::ShaderData::get_shader_variant(ShaderVersion p_shader_version, bool p_ubershader) const { + if (version.is_valid()) { + MutexLock lock(SceneShaderForwardMobile::singleton_mutex); + ERR_FAIL_NULL_V(SceneShaderForwardMobile::singleton, RID()); + return SceneShaderForwardMobile::singleton->shader.version_get_shader(version, p_shader_version + (p_ubershader ? SHADER_VERSION_MAX : 0)); + } else { + return RID(); + } } -RS::ShaderNativeSourceCode SceneShaderForwardMobile::ShaderData::get_native_source_code() const { - SceneShaderForwardMobile *shader_singleton = (SceneShaderForwardMobile *)SceneShaderForwardMobile::singleton; +uint64_t SceneShaderForwardMobile::ShaderData::get_vertex_input_mask(ShaderVersion p_shader_version, bool p_ubershader) { + // Vertex input masks require knowledge of the shader. Since querying the shader can be expensive due to high contention and the necessary mutex, we cache the result instead. + uint32_t input_mask_index = p_shader_version + (p_ubershader ? SHADER_VERSION_MAX : 0); + uint64_t input_mask = vertex_input_masks[input_mask_index].load(std::memory_order_relaxed); + if (input_mask == 0) { + RID shader_rid = get_shader_variant(p_shader_version, p_ubershader); + ERR_FAIL_COND_V(shader_rid.is_null(), 0); + + input_mask = RD::get_singleton()->shader_get_vertex_input_attribute_mask(shader_rid); + vertex_input_masks[input_mask_index].store(input_mask, std::memory_order_relaxed); + } - return shader_singleton->shader.version_get_native_source_code(version); + return input_mask; +} + +bool SceneShaderForwardMobile::ShaderData::is_valid() const { + if (version.is_valid()) { + MutexLock lock(SceneShaderForwardMobile::singleton_mutex); + ERR_FAIL_NULL_V(SceneShaderForwardMobile::singleton, false); + return SceneShaderForwardMobile::singleton->shader.version_is_valid(version); + } else { + return false; + } } SceneShaderForwardMobile::ShaderData::ShaderData() : shader_list_element(this) { + pipeline_hash_map.set_creation_object_and_function(this, &ShaderData::_create_pipeline); + pipeline_hash_map.set_compilations(SceneShaderForwardMobile::singleton->pipeline_compilations, &SceneShaderForwardMobile::singleton_mutex); } SceneShaderForwardMobile::ShaderData::~ShaderData() { - SceneShaderForwardMobile *shader_singleton = (SceneShaderForwardMobile *)SceneShaderForwardMobile::singleton; - ERR_FAIL_NULL(shader_singleton); - //pipeline variants will clear themselves if shader is gone + pipeline_hash_map.clear_pipelines(); + if (version.is_valid()) { - shader_singleton->shader.version_free(version); + MutexLock lock(SceneShaderForwardMobile::singleton_mutex); + ERR_FAIL_NULL(SceneShaderForwardMobile::singleton); + SceneShaderForwardMobile::singleton->shader.version_free(version); } } RendererRD::MaterialStorage::ShaderData *SceneShaderForwardMobile::_create_shader_func() { + MutexLock lock(SceneShaderForwardMobile::singleton_mutex); ShaderData *shader_data = memnew(ShaderData); singleton->shader_list.add(&shader_data->shader_list_element); return shader_data; @@ -407,9 +418,12 @@ void SceneShaderForwardMobile::MaterialData::set_next_pass(RID p_pass) { } bool SceneShaderForwardMobile::MaterialData::update_parameters(const HashMap &p_parameters, bool p_uniform_dirty, bool p_textures_dirty) { - SceneShaderForwardMobile *shader_singleton = (SceneShaderForwardMobile *)SceneShaderForwardMobile::singleton; - - return update_parameters_uniform_set(p_parameters, p_uniform_dirty, p_textures_dirty, shader_data->uniforms, shader_data->ubo_offsets.ptr(), shader_data->texture_uniforms, shader_data->default_texture_params, shader_data->ubo_size, uniform_set, shader_singleton->shader.version_get_shader(shader_data->version, 0), RenderForwardMobile::MATERIAL_UNIFORM_SET, true, true); + if (shader_data->version.is_valid()) { + MutexLock lock(SceneShaderForwardMobile::singleton_mutex); + return update_parameters_uniform_set(p_parameters, p_uniform_dirty, p_textures_dirty, shader_data->uniforms, shader_data->ubo_offsets.ptr(), shader_data->texture_uniforms, shader_data->default_texture_params, shader_data->ubo_size, uniform_set, SceneShaderForwardMobile::singleton->shader.version_get_shader(shader_data->version, 0), RenderForwardMobile::MATERIAL_UNIFORM_SET, true, true); + } else { + return false; + } } SceneShaderForwardMobile::MaterialData::~MaterialData() { @@ -426,6 +440,7 @@ RendererRD::MaterialStorage::MaterialData *SceneShaderForwardMobile::_create_mat /* Scene Shader */ SceneShaderForwardMobile *SceneShaderForwardMobile::singleton = nullptr; +Mutex SceneShaderForwardMobile::singleton_mutex; SceneShaderForwardMobile::SceneShaderForwardMobile() { // there should be only one of these, contained within our RenderForwardMobile singleton. @@ -439,23 +454,29 @@ void SceneShaderForwardMobile::init(const String p_defines) { { Vector shader_versions; - shader_versions.push_back(""); // SHADER_VERSION_COLOR_PASS - shader_versions.push_back("\n#define USE_LIGHTMAP\n"); // SHADER_VERSION_LIGHTMAP_COLOR_PASS - shader_versions.push_back("\n#define MODE_RENDER_DEPTH\n"); // SHADER_VERSION_SHADOW_PASS, should probably change this to MODE_RENDER_SHADOW because we don't have a depth pass here... - shader_versions.push_back("\n#define MODE_RENDER_DEPTH\n#define MODE_DUAL_PARABOLOID\n"); // SHADER_VERSION_SHADOW_PASS_DP - shader_versions.push_back("\n#define MODE_RENDER_DEPTH\n#define MODE_RENDER_MATERIAL\n"); // SHADER_VERSION_DEPTH_PASS_WITH_MATERIAL - - // multiview versions of our shaders - shader_versions.push_back("\n#define USE_MULTIVIEW\n"); // SHADER_VERSION_COLOR_PASS_MULTIVIEW - shader_versions.push_back("\n#define USE_MULTIVIEW\n#define USE_LIGHTMAP\n"); // SHADER_VERSION_LIGHTMAP_COLOR_PASS_MULTIVIEW - shader_versions.push_back("\n#define USE_MULTIVIEW\n#define MODE_RENDER_DEPTH\n"); // SHADER_VERSION_SHADOW_PASS_MULTIVIEW + for (uint32_t ubershader = 0; ubershader < 2; ubershader++) { + const String base_define = ubershader ? "\n#define UBERSHADER\n" : ""; + shader_versions.push_back(base_define + ""); // SHADER_VERSION_COLOR_PASS + shader_versions.push_back(base_define + "\n#define USE_LIGHTMAP\n"); // SHADER_VERSION_LIGHTMAP_COLOR_PASS + shader_versions.push_back(base_define + "\n#define MODE_RENDER_DEPTH\n"); // SHADER_VERSION_SHADOW_PASS, should probably change this to MODE_RENDER_SHADOW because we don't have a depth pass here... + shader_versions.push_back(base_define + "\n#define MODE_RENDER_DEPTH\n#define MODE_DUAL_PARABOLOID\n"); // SHADER_VERSION_SHADOW_PASS_DP + shader_versions.push_back(base_define + "\n#define MODE_RENDER_DEPTH\n#define MODE_RENDER_MATERIAL\n"); // SHADER_VERSION_DEPTH_PASS_WITH_MATERIAL + + // Multiview versions of our shaders. + shader_versions.push_back(base_define + "\n#define USE_MULTIVIEW\n"); // SHADER_VERSION_COLOR_PASS_MULTIVIEW + shader_versions.push_back(base_define + "\n#define USE_MULTIVIEW\n#define USE_LIGHTMAP\n"); // SHADER_VERSION_LIGHTMAP_COLOR_PASS_MULTIVIEW + shader_versions.push_back(base_define + "\n#define USE_MULTIVIEW\n#define MODE_RENDER_DEPTH\n"); // SHADER_VERSION_SHADOW_PASS_MULTIVIEW + } shader.initialize(shader_versions, p_defines); if (!RendererCompositorRD::get_singleton()->is_xr_enabled()) { - shader.set_variant_enabled(SHADER_VERSION_COLOR_PASS_MULTIVIEW, false); - shader.set_variant_enabled(SHADER_VERSION_LIGHTMAP_COLOR_PASS_MULTIVIEW, false); - shader.set_variant_enabled(SHADER_VERSION_SHADOW_PASS_MULTIVIEW, false); + for (uint32_t ubershader = 0; ubershader < 2; ubershader++) { + uint32_t base_variant = ubershader ? SHADER_VERSION_MAX : 0; + shader.set_variant_enabled(base_variant + SHADER_VERSION_COLOR_PASS_MULTIVIEW, false); + shader.set_variant_enabled(base_variant + SHADER_VERSION_LIGHTMAP_COLOR_PASS_MULTIVIEW, false); + shader.set_variant_enabled(base_variant + SHADER_VERSION_SHADOW_PASS_MULTIVIEW, false); + } } } @@ -752,19 +773,23 @@ void fragment() { } } -void SceneShaderForwardMobile::set_default_specialization_constants(const Vector &p_constants) { - default_specialization_constants = p_constants; +void SceneShaderForwardMobile::set_default_specialization(const ShaderSpecialization &p_specialization) { + default_specialization = p_specialization; + for (SelfList *E = shader_list.first(); E; E = E->next()) { - for (int i = 0; i < ShaderData::CULL_VARIANT_MAX; i++) { - for (int j = 0; j < RS::PRIMITIVE_MAX; j++) { - for (int k = 0; k < SHADER_VERSION_MAX; k++) { - E->self()->pipelines[i][j][k].update_specialization_constants(default_specialization_constants); - } - } - } + E->self()->pipeline_hash_map.clear_pipelines(); } } +uint32_t SceneShaderForwardMobile::get_pipeline_compilations(RS::PipelineSource p_source) { + MutexLock lock(SceneShaderForwardMobile::singleton_mutex); + return pipeline_compilations[p_source]; +} + +bool SceneShaderForwardMobile::is_multiview_enabled() const { + return shader.is_variant_enabled(SHADER_VERSION_COLOR_PASS_MULTIVIEW); +} + SceneShaderForwardMobile::~SceneShaderForwardMobile() { RendererRD::MaterialStorage *material_storage = RendererRD::MaterialStorage::get_singleton(); diff --git a/servers/rendering/renderer_rd/forward_mobile/scene_shader_forward_mobile.h b/servers/rendering/renderer_rd/forward_mobile/scene_shader_forward_mobile.h index 833b06c1e31b..c1095d29dc51 100644 --- a/servers/rendering/renderer_rd/forward_mobile/scene_shader_forward_mobile.h +++ b/servers/rendering/renderer_rd/forward_mobile/scene_shader_forward_mobile.h @@ -31,6 +31,7 @@ #ifndef SCENE_SHADER_FORWARD_MOBILE_H #define SCENE_SHADER_FORWARD_MOBILE_H +#include "servers/rendering/renderer_rd/pipeline_hash_map_rd.h" #include "servers/rendering/renderer_rd/renderer_scene_render_rd.h" #include "servers/rendering/renderer_rd/shaders/forward_mobile/scene_forward_mobile.glsl.gen.h" @@ -39,6 +40,7 @@ namespace RendererSceneRenderImplementation { class SceneShaderForwardMobile { private: static SceneShaderForwardMobile *singleton; + static Mutex singleton_mutex; public: enum ShaderVersion { @@ -55,16 +57,52 @@ class SceneShaderForwardMobile { SHADER_VERSION_MAX }; - struct ShaderData : public RendererRD::MaterialStorage::ShaderData { - enum BlendMode { //used internally - BLEND_MODE_MIX, - BLEND_MODE_ADD, - BLEND_MODE_SUB, - BLEND_MODE_MUL, - BLEND_MODE_PREMULT_ALPHA, - BLEND_MODE_ALPHA_TO_COVERAGE + struct ShaderSpecialization { + union { + struct { + uint32_t use_light_projector : 1; + uint32_t use_light_soft_shadows : 1; + uint32_t use_directional_soft_shadows : 1; + uint32_t decal_use_mipmaps : 1; + uint32_t projector_use_mipmaps : 1; + uint32_t disable_omni_lights : 1; + uint32_t disable_spot_lights : 1; + uint32_t disable_reflection_probes : 1; + uint32_t disable_directional_lights : 1; + uint32_t disable_decals : 1; + uint32_t disable_fog : 1; + uint32_t use_depth_fog : 1; + uint32_t is_multimesh : 1; + uint32_t use_lightmap_bicubic_filter : 1; + uint32_t pad : 2; + uint32_t soft_shadow_samples : 4; + uint32_t penumbra_shadow_samples : 4; + uint32_t directional_soft_shadow_samples : 4; + uint32_t directional_penumbra_shadow_samples : 4; + }; + + uint32_t packed_0; + }; + + union { + float luminance_multiplier; + float packed_1; }; + uint32_t packed_2; + }; + + struct UbershaderConstants { + union { + struct { + uint32_t cull_mode : 2; + }; + + uint32_t packed_0; + }; + }; + + struct ShaderData : public RendererRD::MaterialStorage::ShaderData { enum DepthDraw { DEPTH_DRAW_DISABLED, DEPTH_DRAW_OPAQUE, @@ -96,10 +134,40 @@ class SceneShaderForwardMobile { ALPHA_ANTIALIASING_ALPHA_TO_COVERAGE_AND_TO_ONE }; - bool valid = false; + struct PipelineKey { + RD::VertexFormatID vertex_format_id; + RD::FramebufferFormatID framebuffer_format_id; + RD::PolygonCullMode cull_mode = RD::POLYGON_CULL_MAX; + RS::PrimitiveType primitive_type = RS::PRIMITIVE_MAX; + ShaderSpecialization shader_specialization = {}; + ShaderVersion version = SHADER_VERSION_MAX; + uint32_t render_pass = 0; + uint32_t wireframe = false; + uint32_t ubershader = false; + + uint32_t hash() const { + uint32_t h = hash_murmur3_one_32(vertex_format_id); + h = hash_murmur3_one_32(framebuffer_format_id, h); + h = hash_murmur3_one_32(cull_mode, h); + h = hash_murmur3_one_32(primitive_type, h); + h = hash_murmur3_one_32(shader_specialization.packed_0, h); + h = hash_murmur3_one_float(shader_specialization.packed_1, h); + h = hash_murmur3_one_32(shader_specialization.packed_2, h); + h = hash_murmur3_one_32(version, h); + h = hash_murmur3_one_32(render_pass, h); + h = hash_murmur3_one_32(wireframe, h); + h = hash_murmur3_one_32(ubershader, h); + return hash_fmix32(h); + } + }; + + void _create_pipeline(PipelineKey p_pipeline_key); + PipelineHashMapRD pipeline_hash_map; + RID version; - uint64_t vertex_input_mask = 0; - PipelineCacheRD pipelines[CULL_VARIANT_MAX][RS::PRIMITIVE_MAX][SHADER_VERSION_MAX]; + + static const uint32_t VERTEX_INPUT_MASKS_SIZE = SHADER_VERSION_MAX * 2; + std::atomic vertex_input_masks[VERTEX_INPUT_MASKS_SIZE] = {}; Vector texture_uniforms; @@ -111,6 +179,11 @@ class SceneShaderForwardMobile { DepthDraw depth_draw; DepthTest depth_test; + int blend_mode = BLEND_MODE_MIX; + int depth_testi = DEPTH_TEST_ENABLED; + int alpha_antialiasing_mode = ALPHA_ANTIALIASING_OFF; + int cull_mode = CULL_BACK; + bool uses_point_size = false; bool uses_alpha = false; bool uses_blend_alpha = false; @@ -122,6 +195,8 @@ class SceneShaderForwardMobile { bool uses_normal = false; bool uses_tangent = false; bool uses_particle_trails = false; + bool uses_normal_map = false; + bool wireframe = false; bool unshaded = false; bool uses_vertex = false; @@ -140,10 +215,35 @@ class SceneShaderForwardMobile { uint64_t last_pass = 0; uint32_t index = 0; + _FORCE_INLINE_ bool uses_alpha_pass() const { + bool has_read_screen_alpha = uses_screen_texture || uses_depth_texture || uses_normal_texture; + bool has_base_alpha = (uses_alpha && (!uses_alpha_clip || uses_alpha_antialiasing)); + bool has_blend_alpha = uses_blend_alpha; + bool has_alpha = has_base_alpha || has_blend_alpha; + bool no_depth_draw = depth_draw == DEPTH_DRAW_DISABLED; + bool no_depth_test = depth_test == DEPTH_TEST_DISABLED; + return has_alpha || has_read_screen_alpha || no_depth_draw || no_depth_test; + } + + _FORCE_INLINE_ bool uses_depth_in_alpha_pass() const { + bool no_depth_draw = depth_draw == DEPTH_DRAW_DISABLED; + bool no_depth_test = depth_test == DEPTH_TEST_DISABLED; + return (uses_depth_prepass_alpha || uses_alpha_antialiasing) && !(no_depth_draw || no_depth_test); + } + + _FORCE_INLINE_ bool uses_shared_shadow_material() const { + return !uses_particle_trails && !writes_modelview_or_projection && !uses_vertex && !uses_discard && !uses_depth_prepass_alpha && !uses_alpha_clip && !uses_alpha_antialiasing && !uses_world_coordinates; + } + virtual void set_code(const String &p_Code); virtual bool is_animated() const; virtual bool casts_shadows() const; virtual RS::ShaderNativeSourceCode get_native_source_code() const; + RD::PolygonCullMode get_cull_mode_from_cull_variant(CullVariant p_cull_variant); + void _clear_vertex_input_mask_cache(); + RID get_shader_variant(ShaderVersion p_shader_version, bool p_ubershader) const; + uint64_t get_vertex_input_mask(ShaderVersion p_shader_version, bool p_ubershader); + bool is_valid() const; SelfList shader_list_element; @@ -204,10 +304,14 @@ class SceneShaderForwardMobile { SceneShaderForwardMobile(); ~SceneShaderForwardMobile(); - Vector default_specialization_constants; + ShaderSpecialization default_specialization = {}; + + uint32_t pipeline_compilations[RS::PIPELINE_SOURCE_MAX] = {}; void init(const String p_defines); - void set_default_specialization_constants(const Vector &p_constants); + void set_default_specialization(const ShaderSpecialization &p_specialization); + uint32_t get_pipeline_compilations(RS::PipelineSource p_source); + bool is_multiview_enabled() const; }; } // namespace RendererSceneRenderImplementation diff --git a/servers/rendering/renderer_rd/pipeline_cache_rd.cpp b/servers/rendering/renderer_rd/pipeline_cache_rd.cpp index c00e5f8b5ebe..ed0314381220 100644 --- a/servers/rendering/renderer_rd/pipeline_cache_rd.cpp +++ b/servers/rendering/renderer_rd/pipeline_cache_rd.cpp @@ -89,7 +89,6 @@ void PipelineCacheRD::setup(RID p_shader, RD::RenderPrimitive p_primitive, const ERR_FAIL_COND(p_shader.is_null()); _clear(); shader = p_shader; - input_mask = 0; render_primitive = p_primitive; rasterization_state = p_rasterization_state; multisample_state = p_multisample; @@ -112,13 +111,11 @@ void PipelineCacheRD::update_shader(RID p_shader) { void PipelineCacheRD::clear() { _clear(); shader = RID(); //clear shader - input_mask = 0; } PipelineCacheRD::PipelineCacheRD() { version_count = 0; versions = nullptr; - input_mask = 0; } PipelineCacheRD::~PipelineCacheRD() { diff --git a/servers/rendering/renderer_rd/pipeline_cache_rd.h b/servers/rendering/renderer_rd/pipeline_cache_rd.h index 0ebebd054031..64e6b5078a7b 100644 --- a/servers/rendering/renderer_rd/pipeline_cache_rd.h +++ b/servers/rendering/renderer_rd/pipeline_cache_rd.h @@ -38,7 +38,6 @@ class PipelineCacheRD { SpinLock spin_lock; RID shader; - uint64_t input_mask; RD::RenderPrimitive render_primitive; RD::PipelineRasterizationState rasterization_state; @@ -92,11 +91,8 @@ class PipelineCacheRD { } _FORCE_INLINE_ uint64_t get_vertex_input_mask() { - if (input_mask == 0) { - ERR_FAIL_COND_V(shader.is_null(), 0); - input_mask = RD::get_singleton()->shader_get_vertex_input_attribute_mask(shader); - } - return input_mask; + ERR_FAIL_COND_V(shader.is_null(), 0); + return RD::get_singleton()->shader_get_vertex_input_attribute_mask(shader); } void clear(); PipelineCacheRD(); diff --git a/servers/rendering/renderer_rd/pipeline_hash_map_rd.h b/servers/rendering/renderer_rd/pipeline_hash_map_rd.h new file mode 100644 index 000000000000..b76f8ae5e699 --- /dev/null +++ b/servers/rendering/renderer_rd/pipeline_hash_map_rd.h @@ -0,0 +1,218 @@ +/**************************************************************************/ +/* pipeline_hash_map_rd.h */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#ifndef PIPELINE_HASH_MAP_RD_H +#define PIPELINE_HASH_MAP_RD_H + +#include "servers/rendering/rendering_device.h" +#include "servers/rendering_server.h" + +#define PRINT_PIPELINE_COMPILATION_KEYS 0 + +template +class PipelineHashMapRD { +private: + CreationClass *creation_object = nullptr; + CreationFunction creation_function = nullptr; + Mutex *compilations_mutex = nullptr; + uint32_t *compilations = nullptr; + RBMap hash_map; + LocalVector> compiled_queue; + Mutex compiled_queue_mutex; + HashMap compilation_tasks; + Mutex local_mutex; + + bool _add_new_pipelines_to_map() { + thread_local Vector hashes_added; + hashes_added.clear(); + + { + MutexLock lock(compiled_queue_mutex); + for (const Pair &pair : compiled_queue) { + hash_map[pair.first] = pair.second; + hashes_added.push_back(pair.first); + } + + compiled_queue.clear(); + } + + { + MutexLock local_lock(local_mutex); + for (uint32_t hash : hashes_added) { + HashMap::Iterator task_it = compilation_tasks.find(hash); + if (task_it != compilation_tasks.end()) { + compilation_tasks.remove(task_it); + } + } + } + + return !hashes_added.is_empty(); + } + + void _wait_for_compilation() { + MutexLock local_lock(local_mutex); + for (KeyValue key_value : compilation_tasks) { + WorkerThreadPool::get_singleton()->wait_for_task_completion(key_value.value); + } + } + +public: + void add_compiled_pipeline(uint32_t p_hash, RID p_pipeline) { + compiled_queue_mutex.lock(); + compiled_queue.push_back({ p_hash, p_pipeline }); + compiled_queue_mutex.unlock(); + } + + // Start compilation of a pipeline ahead of time in the background. Returns true if the compilation was started, false if it wasn't required. Source is only used for collecting statistics. + bool compile_pipeline(const Key &p_key, uint32_t p_key_hash, RS::PipelineSource p_source) { + DEV_ASSERT((creation_object != nullptr) && (creation_function != nullptr) && "Creation object and function was not set before attempting to compile a pipeline."); + + // Check if the pipeline was already compiled. + compiled_queue_mutex.lock(); + bool already_exists = hash_map.has(p_key_hash); + compiled_queue_mutex.unlock(); + if (already_exists) { + return false; + } + + // Check if the pipeline is currently being compiled. + MutexLock local_lock(local_mutex); + if (compilation_tasks.has(p_key_hash)) { + return false; + } + + if (compilations_mutex != nullptr) { + MutexLock compilations_lock(*compilations_mutex); + compilations[p_source]++; + } + +#if PRINT_PIPELINE_COMPILATION_KEYS + String source_name = "UNKNOWN"; + switch (p_source) { + case RS::PIPELINE_SOURCE_CANVAS: + source_name = "CANVAS"; + break; + case RS::PIPELINE_SOURCE_MESH: + source_name = "MESH"; + break; + case RS::PIPELINE_SOURCE_SURFACE: + source_name = "SURFACE"; + break; + case RS::PIPELINE_SOURCE_DRAW: + source_name = "DRAW"; + break; + case RS::PIPELINE_SOURCE_SPECIALIZATION: + source_name = "SPECIALIZATION"; + break; + } + + print_line("HASH:", p_key_hash, "SOURCE:", source_name); +#endif + + // Queue a background compilation task. + WorkerThreadPool::TaskID task_id = WorkerThreadPool::get_singleton()->add_template_task(creation_object, creation_function, p_key, false, "PipelineCompilation"); + compilation_tasks.insert(p_key_hash, task_id); + + return true; + } + + // Retrieve a pipeline. It'll return an empty pipeline if it's not available yet, but it'll be guaranteed to succeed if 'wait for compilation' is true and stall as necessary. Source is just an optional number to aid debugging. + RID get_pipeline(const Key &p_key, uint32_t p_key_hash, bool p_wait_for_compilation, RS::PipelineSource p_source) { + RBMap::Element *e = hash_map.find(p_key_hash); + + if (e == nullptr) { + // Check if there's any new pipelines that need to be added and try again. This method triggers a mutex lock. + if (_add_new_pipelines_to_map()) { + e = hash_map.find(p_key_hash); + } + } + + if (e == nullptr) { + // Lock access to the compilation maps. + MutexLock local_lock(local_mutex); + + // Request compilation. The method will ignore the request if it's already being compiled. + compile_pipeline(p_key, p_key_hash, p_source); + + if (p_wait_for_compilation) { + if (compilation_tasks.has(p_key_hash)) { + // If a background compilation task was used, wait for it. + WorkerThreadPool::get_singleton()->wait_for_task_completion(compilation_tasks[p_key_hash]); + } + + _add_new_pipelines_to_map(); + + e = hash_map.find(p_key_hash); + if (e != nullptr) { + return e->value(); + } else { + // Pipeline could not be compiled due to an internal error. Store an empty RID so compilation is not attempted again. + hash_map[p_key_hash] = RID(); + return RID(); + } + } else { + return RID(); + } + } else { + return e->value(); + } + } + + // Delete all cached pipelines. Can stall if background compilation is in progress. + void clear_pipelines() { + _wait_for_compilation(); + _add_new_pipelines_to_map(); + + for (KeyValue entry : hash_map) { + RD::get_singleton()->free(entry.value); + } + + hash_map.clear(); + } + + // Set the external pipeline compilations array to increase the counters on every time a pipeline is compiled. + void set_compilations(uint32_t *p_compilations, Mutex *p_compilations_mutex) { + compilations = p_compilations; + compilations_mutex = p_compilations_mutex; + } + + void set_creation_object_and_function(CreationClass *p_creation_object, CreationFunction p_creation_function) { + creation_object = p_creation_object; + creation_function = p_creation_function; + } + + PipelineHashMapRD() {} + + ~PipelineHashMapRD() { + clear_pipelines(); + } +}; + +#endif // PIPELINE_HASH_MAP_RD_H diff --git a/servers/rendering/renderer_rd/renderer_canvas_render_rd.cpp b/servers/rendering/renderer_rd/renderer_canvas_render_rd.cpp index 57497eb20701..22284f2fae60 100644 --- a/servers/rendering/renderer_rd/renderer_canvas_render_rd.cpp +++ b/servers/rendering/renderer_rd/renderer_canvas_render_rd.cpp @@ -351,6 +351,23 @@ void RendererCanvasRenderRD::free_polygon(PolygonID p_polygon) { //////////////////// +static RD::RenderPrimitive _primitive_type_to_render_primitive(RS::PrimitiveType p_primitive) { + switch (p_primitive) { + case RS::PRIMITIVE_POINTS: + return RD::RENDER_PRIMITIVE_POINTS; + case RS::PRIMITIVE_LINES: + return RD::RENDER_PRIMITIVE_LINES; + case RS::PRIMITIVE_LINE_STRIP: + return RD::RENDER_PRIMITIVE_LINESTRIPS; + case RS::PRIMITIVE_TRIANGLES: + return RD::RENDER_PRIMITIVE_TRIANGLES; + case RS::PRIMITIVE_TRIANGLE_STRIP: + return RD::RENDER_PRIMITIVE_TRIANGLE_STRIPS; + default: + return RD::RENDER_PRIMITIVE_MAX; + } +} + _FORCE_INLINE_ static uint32_t _indices_to_primitives(RS::PrimitiveType p_primitive, uint32_t p_indices) { static const uint32_t divisor[RS::PRIMITIVE_MAX] = { 1, 2, 1, 3, 1 }; static const uint32_t subtractor[RS::PRIMITIVE_MAX] = { 0, 0, 1, 0, 1 }; @@ -450,6 +467,42 @@ RID RendererCanvasRenderRD::_create_base_uniform_set(RID p_to_render_target, boo return uniform_set; } +RID RendererCanvasRenderRD::_get_pipeline_specialization_or_ubershader(CanvasShaderData *p_shader_data, PipelineKey &r_pipeline_key, PushConstant &r_push_constant, RID p_mesh_instance, void *p_surface, uint32_t p_surface_index, RID *r_vertex_array) { + r_pipeline_key.ubershader = 0; + + const uint32_t ubershader_iterations = 1; + while (r_pipeline_key.ubershader < ubershader_iterations) { + if (r_vertex_array != nullptr) { + RendererRD::MeshStorage *mesh_storage = RendererRD::MeshStorage::get_singleton(); + uint64_t input_mask = p_shader_data->get_vertex_input_mask(r_pipeline_key.variant, r_pipeline_key.ubershader); + if (p_mesh_instance.is_valid()) { + mesh_storage->mesh_instance_surface_get_vertex_arrays_and_format(p_mesh_instance, p_surface_index, input_mask, false, *r_vertex_array, r_pipeline_key.vertex_format_id); + } else { + mesh_storage->mesh_surface_get_vertex_arrays_and_format(p_surface, input_mask, false, *r_vertex_array, r_pipeline_key.vertex_format_id); + } + } + + if (r_pipeline_key.ubershader) { + r_push_constant.shader_specialization = r_pipeline_key.shader_specialization; + r_pipeline_key.shader_specialization = {}; + } else { + r_push_constant.shader_specialization = {}; + } + + bool wait_for_compilation = r_pipeline_key.ubershader || ubershader_iterations == 1; + RS::PipelineSource source = RS::PIPELINE_SOURCE_CANVAS; + RID pipeline = p_shader_data->pipeline_hash_map.get_pipeline(r_pipeline_key, r_pipeline_key.hash(), wait_for_compilation, source); + if (pipeline.is_valid()) { + return pipeline; + } + + r_pipeline_key.ubershader++; + } + + // This case should never be reached unless the shader wasn't available. + return RID(); +} + void RendererCanvasRenderRD::canvas_render_items(RID p_to_render_target, Item *p_item_list, const Color &p_modulate, Light *p_light_list, Light *p_directional_light_list, const Transform2D &p_canvas_transform, RenderingServer::CanvasItemTextureFilter p_default_filter, RenderingServer::CanvasItemTextureRepeat p_default_repeat, bool p_snap_2d_vertices_to_pixel, bool &r_sdf_used, RenderingMethod::RenderInfo *r_render_info) { RendererRD::TextureStorage *texture_storage = RendererRD::TextureStorage::get_singleton(); RendererRD::MaterialStorage *material_storage = RendererRD::MaterialStorage::get_singleton(); @@ -717,7 +770,7 @@ void RendererCanvasRenderRD::canvas_render_items(RID p_to_render_target, Item *p if (material.is_valid()) { CanvasMaterialData *md = static_cast(material_storage->material_get_data(material, RendererRD::MaterialStorage::SHADER_TYPE_2D)); - if (md && md->shader_data->valid) { + if (md && md->shader_data->is_valid()) { if (md->shader_data->uses_screen_texture && canvas_group_owner == nullptr) { if (!material_screen_texture_cached) { backbuffer_copy = true; @@ -1355,17 +1408,72 @@ void RendererCanvasRenderRD::occluder_polygon_set_cull_mode(RID p_occluder, RS:: oc->cull_mode = p_mode; } +void RendererCanvasRenderRD::CanvasShaderData::_clear_vertex_input_mask_cache() { + for (uint32_t i = 0; i < VERTEX_INPUT_MASKS_SIZE; i++) { + vertex_input_masks[i].store(0); + } +} + +void RendererCanvasRenderRD::CanvasShaderData::_create_pipeline(PipelineKey p_pipeline_key) { +#if PRINT_PIPELINE_COMPILATION_KEYS + print_line( + "HASH:", p_pipeline_key.hash(), + "VERSION:", version, + "VARIANT:", p_pipeline_key.variant, + "FRAMEBUFFER:", p_pipeline_key.framebuffer_format_id, + "VERTEX:", p_pipeline_key.vertex_format_id, + "PRIMITIVE:", p_pipeline_key.render_primitive, + "SPEC PACKED #0:", p_pipeline_key.shader_specialization.packed_0, + "LCD:", p_pipeline_key.lcd_blend); +#endif + + RendererRD::MaterialStorage::ShaderData::BlendMode blend_mode_rd = RendererRD::MaterialStorage::ShaderData::BlendMode(blend_mode); + RD::PipelineColorBlendState blend_state; + RD::PipelineColorBlendState::Attachment attachment; + uint32_t dynamic_state_flags = 0; + if (p_pipeline_key.lcd_blend) { + attachment.enable_blend = true; + attachment.alpha_blend_op = RD::BLEND_OP_ADD; + attachment.color_blend_op = RD::BLEND_OP_ADD; + attachment.src_color_blend_factor = RD::BLEND_FACTOR_CONSTANT_COLOR; + attachment.dst_color_blend_factor = RD::BLEND_FACTOR_ONE_MINUS_SRC_COLOR; + attachment.src_alpha_blend_factor = RD::BLEND_FACTOR_ONE; + attachment.dst_alpha_blend_factor = RD::BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; + dynamic_state_flags = RD::DYNAMIC_STATE_BLEND_CONSTANTS; + } else { + attachment = RendererRD::MaterialStorage::ShaderData::blend_mode_to_blend_attachment(blend_mode_rd); + } + + blend_state.attachments.push_back(attachment); + + // Convert the specialization from the key to pipeline specialization constants. + Vector specialization_constants; + RD::PipelineSpecializationConstant sc; + sc.constant_id = 0; + sc.int_value = p_pipeline_key.shader_specialization.packed_0; + sc.type = RD::PIPELINE_SPECIALIZATION_CONSTANT_TYPE_INT; + specialization_constants.push_back(sc); + + RID shader_rid = get_shader(p_pipeline_key.variant, p_pipeline_key.ubershader); + ERR_FAIL_COND(shader_rid.is_null()); + + RID pipeline = RD::get_singleton()->render_pipeline_create(shader_rid, p_pipeline_key.framebuffer_format_id, p_pipeline_key.vertex_format_id, p_pipeline_key.render_primitive, RD::PipelineRasterizationState(), RD::PipelineMultisampleState(), RD::PipelineDepthStencilState(), blend_state, dynamic_state_flags, 0, specialization_constants); + ERR_FAIL_COND(pipeline.is_null()); + + pipeline_hash_map.add_compiled_pipeline(p_pipeline_key.hash(), pipeline); +} + void RendererCanvasRenderRD::CanvasShaderData::set_code(const String &p_code) { //compile code = p_code; - valid = false; ubo_size = 0; uniforms.clear(); uses_screen_texture = false; uses_screen_texture_mipmaps = false; uses_sdf = false; uses_time = false; + _clear_vertex_input_mask_cache(); if (code.is_empty()) { return; //just invalid, but no error @@ -1373,7 +1481,7 @@ void RendererCanvasRenderRD::CanvasShaderData::set_code(const String &p_code) { ShaderCompiler::GeneratedCode gen_code; - int blend_mode = BLEND_MODE_MIX; + blend_mode = BLEND_MODE_MIX; ShaderCompiler::IdentifierActions actions; actions.entry_point_stages["vertex"] = ShaderCompiler::STAGE_VERTEX; @@ -1384,7 +1492,7 @@ void RendererCanvasRenderRD::CanvasShaderData::set_code(const String &p_code) { actions.render_mode_values["blend_mix"] = Pair(&blend_mode, BLEND_MODE_MIX); actions.render_mode_values["blend_sub"] = Pair(&blend_mode, BLEND_MODE_SUB); actions.render_mode_values["blend_mul"] = Pair(&blend_mode, BLEND_MODE_MUL); - actions.render_mode_values["blend_premul_alpha"] = Pair(&blend_mode, BLEND_MODE_PMALPHA); + actions.render_mode_values["blend_premul_alpha"] = Pair(&blend_mode, BLEND_MODE_PREMULTIPLIED_ALPHA); actions.render_mode_values["blend_disabled"] = Pair(&blend_mode, BLEND_MODE_DISABLED); actions.usage_flag_pointers["texture_sdf"] = &uses_sdf; @@ -1393,6 +1501,7 @@ void RendererCanvasRenderRD::CanvasShaderData::set_code(const String &p_code) { actions.uniforms = &uniforms; RendererCanvasRenderRD *canvas_singleton = static_cast(RendererCanvasRender::singleton); + MutexLock lock(canvas_singleton->shader.mutex); Error err = canvas_singleton->shader.compiler.compile(RS::SHADER_CANVAS_ITEM, code, &actions, path, gen_code); ERR_FAIL_COND_MSG(err != OK, "Shader compilation failed."); @@ -1400,6 +1509,8 @@ void RendererCanvasRenderRD::CanvasShaderData::set_code(const String &p_code) { uses_screen_texture_mipmaps = gen_code.uses_screen_texture_mipmaps; uses_screen_texture = gen_code.uses_screen_texture; + pipeline_hash_map.clear_pipelines(); + if (version.is_null()) { version = canvas_singleton->shader.canvas_shader.version_create(); } @@ -1422,168 +1533,70 @@ void RendererCanvasRenderRD::CanvasShaderData::set_code(const String &p_code) { print_line("\n**fragment_globals:\n" + gen_code.stage_globals[ShaderCompiler::STAGE_FRAGMENT]); #endif canvas_singleton->shader.canvas_shader.version_set_code(version, gen_code.code, gen_code.uniforms, gen_code.stage_globals[ShaderCompiler::STAGE_VERTEX], gen_code.stage_globals[ShaderCompiler::STAGE_FRAGMENT], gen_code.defines); - ERR_FAIL_COND(!canvas_singleton->shader.canvas_shader.version_is_valid(version)); ubo_size = gen_code.uniform_total_size; ubo_offsets = gen_code.uniform_offsets; texture_uniforms = gen_code.texture_uniforms; +} - //update them pipelines - - RD::PipelineColorBlendState::Attachment attachment; - - switch (blend_mode) { - case BLEND_MODE_DISABLED: { - // nothing to do here, disabled by default - - } break; - case BLEND_MODE_MIX: { - attachment.enable_blend = true; - attachment.color_blend_op = RD::BLEND_OP_ADD; - attachment.src_color_blend_factor = RD::BLEND_FACTOR_SRC_ALPHA; - attachment.dst_color_blend_factor = RD::BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; - - attachment.alpha_blend_op = RD::BLEND_OP_ADD; - attachment.src_alpha_blend_factor = RD::BLEND_FACTOR_ONE; - attachment.dst_alpha_blend_factor = RD::BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; - - } break; - case BLEND_MODE_ADD: { - attachment.enable_blend = true; - attachment.alpha_blend_op = RD::BLEND_OP_ADD; - attachment.color_blend_op = RD::BLEND_OP_ADD; - attachment.src_color_blend_factor = RD::BLEND_FACTOR_SRC_ALPHA; - attachment.dst_color_blend_factor = RD::BLEND_FACTOR_ONE; - attachment.src_alpha_blend_factor = RD::BLEND_FACTOR_SRC_ALPHA; - attachment.dst_alpha_blend_factor = RD::BLEND_FACTOR_ONE; - - } break; - case BLEND_MODE_SUB: { - attachment.enable_blend = true; - attachment.alpha_blend_op = RD::BLEND_OP_REVERSE_SUBTRACT; - attachment.color_blend_op = RD::BLEND_OP_REVERSE_SUBTRACT; - attachment.src_color_blend_factor = RD::BLEND_FACTOR_SRC_ALPHA; - attachment.dst_color_blend_factor = RD::BLEND_FACTOR_ONE; - attachment.src_alpha_blend_factor = RD::BLEND_FACTOR_SRC_ALPHA; - attachment.dst_alpha_blend_factor = RD::BLEND_FACTOR_ONE; +bool RendererCanvasRenderRD::CanvasShaderData::is_animated() const { + return false; +} - } break; - case BLEND_MODE_MUL: { - attachment.enable_blend = true; - attachment.alpha_blend_op = RD::BLEND_OP_ADD; - attachment.color_blend_op = RD::BLEND_OP_ADD; - attachment.src_color_blend_factor = RD::BLEND_FACTOR_DST_COLOR; - attachment.dst_color_blend_factor = RD::BLEND_FACTOR_ZERO; - attachment.src_alpha_blend_factor = RD::BLEND_FACTOR_DST_ALPHA; - attachment.dst_alpha_blend_factor = RD::BLEND_FACTOR_ZERO; +bool RendererCanvasRenderRD::CanvasShaderData::casts_shadows() const { + return false; +} - } break; - case BLEND_MODE_PMALPHA: { - attachment.enable_blend = true; - attachment.alpha_blend_op = RD::BLEND_OP_ADD; - attachment.color_blend_op = RD::BLEND_OP_ADD; - attachment.src_color_blend_factor = RD::BLEND_FACTOR_ONE; - attachment.dst_color_blend_factor = RD::BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; - attachment.src_alpha_blend_factor = RD::BLEND_FACTOR_ONE; - attachment.dst_alpha_blend_factor = RD::BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; +RS::ShaderNativeSourceCode RendererCanvasRenderRD::CanvasShaderData::get_native_source_code() const { + RendererCanvasRenderRD *canvas_singleton = static_cast(RendererCanvasRender::singleton); + MutexLock lock(canvas_singleton->shader.mutex); + return canvas_singleton->shader.canvas_shader.version_get_native_source_code(version); +} - } break; +RID RendererCanvasRenderRD::CanvasShaderData::get_shader(ShaderVariant p_shader_variant, bool p_ubershader) const { + if (version.is_valid()) { + uint32_t variant_index = p_shader_variant + (p_ubershader ? SHADER_VARIANT_MAX : 0); + RendererCanvasRenderRD *canvas_singleton = static_cast(RendererCanvasRender::singleton); + MutexLock lock(canvas_singleton->shader.mutex); + return canvas_singleton->shader.canvas_shader.version_get_shader(version, variant_index); + } else { + return RID(); } +} - RD::PipelineColorBlendState blend_state; - blend_state.attachments.push_back(attachment); +uint64_t RendererCanvasRenderRD::CanvasShaderData::get_vertex_input_mask(ShaderVariant p_shader_variant, bool p_ubershader) { + // Vertex input masks require knowledge of the shader. Since querying the shader can be expensive due to high contention and the necessary mutex, we cache the result instead. + uint32_t input_mask_index = p_shader_variant + (p_ubershader ? SHADER_VARIANT_MAX : 0); + uint64_t input_mask = vertex_input_masks[input_mask_index].load(std::memory_order_relaxed); + if (input_mask == 0) { + RID shader_rid = get_shader(p_shader_variant, p_ubershader); + ERR_FAIL_COND_V(shader_rid.is_null(), 0); - RD::PipelineColorBlendState::Attachment attachment_lcd; - attachment_lcd.enable_blend = true; - attachment_lcd.alpha_blend_op = RD::BLEND_OP_ADD; - attachment_lcd.color_blend_op = RD::BLEND_OP_ADD; - attachment_lcd.src_color_blend_factor = RD::BLEND_FACTOR_CONSTANT_COLOR; - attachment_lcd.dst_color_blend_factor = RD::BLEND_FACTOR_ONE_MINUS_SRC_COLOR; - attachment_lcd.src_alpha_blend_factor = RD::BLEND_FACTOR_ONE; - attachment_lcd.dst_alpha_blend_factor = RD::BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; - - RD::PipelineColorBlendState blend_state_lcd; - blend_state_lcd.attachments.push_back(attachment_lcd); - - //update pipelines - - for (int i = 0; i < PIPELINE_LIGHT_MODE_MAX; i++) { - for (int j = 0; j < PIPELINE_VARIANT_MAX; j++) { - RD::RenderPrimitive primitive[PIPELINE_VARIANT_MAX] = { - RD::RENDER_PRIMITIVE_TRIANGLES, - RD::RENDER_PRIMITIVE_TRIANGLES, - RD::RENDER_PRIMITIVE_TRIANGLES, - RD::RENDER_PRIMITIVE_LINES, - RD::RENDER_PRIMITIVE_POINTS, - RD::RENDER_PRIMITIVE_TRIANGLES, - RD::RENDER_PRIMITIVE_TRIANGLE_STRIPS, - RD::RENDER_PRIMITIVE_LINES, - RD::RENDER_PRIMITIVE_LINESTRIPS, - RD::RENDER_PRIMITIVE_POINTS, - RD::RENDER_PRIMITIVE_TRIANGLES, - }; - - ShaderVariant shader_variants[PIPELINE_LIGHT_MODE_MAX][PIPELINE_VARIANT_MAX] = { - { - //non lit - SHADER_VARIANT_QUAD, - SHADER_VARIANT_NINEPATCH, - SHADER_VARIANT_PRIMITIVE, - SHADER_VARIANT_PRIMITIVE, - SHADER_VARIANT_PRIMITIVE_POINTS, - SHADER_VARIANT_ATTRIBUTES, - SHADER_VARIANT_ATTRIBUTES, - SHADER_VARIANT_ATTRIBUTES, - SHADER_VARIANT_ATTRIBUTES, - SHADER_VARIANT_ATTRIBUTES_POINTS, - SHADER_VARIANT_QUAD, - }, - { - //lit - SHADER_VARIANT_QUAD_LIGHT, - SHADER_VARIANT_NINEPATCH_LIGHT, - SHADER_VARIANT_PRIMITIVE_LIGHT, - SHADER_VARIANT_PRIMITIVE_LIGHT, - SHADER_VARIANT_PRIMITIVE_POINTS_LIGHT, - SHADER_VARIANT_ATTRIBUTES_LIGHT, - SHADER_VARIANT_ATTRIBUTES_LIGHT, - SHADER_VARIANT_ATTRIBUTES_LIGHT, - SHADER_VARIANT_ATTRIBUTES_LIGHT, - SHADER_VARIANT_ATTRIBUTES_POINTS_LIGHT, - SHADER_VARIANT_QUAD_LIGHT, - }, - }; - - RID shader_variant = canvas_singleton->shader.canvas_shader.version_get_shader(version, shader_variants[i][j]); - if (j == PIPELINE_VARIANT_QUAD_LCD_BLEND) { - pipeline_variants.variants[i][j].setup(shader_variant, primitive[j], RD::PipelineRasterizationState(), RD::PipelineMultisampleState(), RD::PipelineDepthStencilState(), blend_state_lcd, RD::DYNAMIC_STATE_BLEND_CONSTANTS); - } else { - pipeline_variants.variants[i][j].setup(shader_variant, primitive[j], RD::PipelineRasterizationState(), RD::PipelineMultisampleState(), RD::PipelineDepthStencilState(), blend_state, 0); - } - } + input_mask = RD::get_singleton()->shader_get_vertex_input_attribute_mask(shader_rid); + vertex_input_masks[input_mask_index].store(input_mask, std::memory_order_relaxed); } - valid = true; + return input_mask; } -bool RendererCanvasRenderRD::CanvasShaderData::is_animated() const { - return false; -} - -bool RendererCanvasRenderRD::CanvasShaderData::casts_shadows() const { - return false; +bool RendererCanvasRenderRD::CanvasShaderData::is_valid() const { + RendererCanvasRenderRD *canvas_singleton = static_cast(RendererCanvasRender::singleton); + MutexLock lock(canvas_singleton->shader.mutex); + return canvas_singleton->shader.canvas_shader.version_is_valid(version); } -RS::ShaderNativeSourceCode RendererCanvasRenderRD::CanvasShaderData::get_native_source_code() const { +RendererCanvasRenderRD::CanvasShaderData::CanvasShaderData() { RendererCanvasRenderRD *canvas_singleton = static_cast(RendererCanvasRender::singleton); - return canvas_singleton->shader.canvas_shader.version_get_native_source_code(version); + pipeline_hash_map.set_creation_object_and_function(this, &CanvasShaderData::_create_pipeline); + pipeline_hash_map.set_compilations(&canvas_singleton->shader.pipeline_compilations[0], &canvas_singleton->shader.mutex); } RendererCanvasRenderRD::CanvasShaderData::~CanvasShaderData() { - RendererCanvasRenderRD *canvas_singleton = static_cast(RendererCanvasRender::singleton); - ERR_FAIL_NULL(canvas_singleton); - //pipeline variants will clear themselves if shader is gone + pipeline_hash_map.clear_pipelines(); + if (version.is_valid()) { + RendererCanvasRenderRD *canvas_singleton = static_cast(RendererCanvasRender::singleton); + MutexLock lock(canvas_singleton->shader.mutex); canvas_singleton->shader.canvas_shader.version_free(version); } } @@ -1595,8 +1608,10 @@ RendererRD::MaterialStorage::ShaderData *RendererCanvasRenderRD::_create_shader_ bool RendererCanvasRenderRD::CanvasMaterialData::update_parameters(const HashMap &p_parameters, bool p_uniform_dirty, bool p_textures_dirty) { RendererCanvasRenderRD *canvas_singleton = static_cast(RendererCanvasRender::singleton); - bool uniform_set_changed = update_parameters_uniform_set(p_parameters, p_uniform_dirty, p_textures_dirty, shader_data->uniforms, shader_data->ubo_offsets.ptr(), shader_data->texture_uniforms, shader_data->default_texture_params, shader_data->ubo_size, uniform_set, canvas_singleton->shader.canvas_shader.version_get_shader(shader_data->version, 0), MATERIAL_UNIFORM_SET, true, false); - bool uniform_set_srgb_changed = update_parameters_uniform_set(p_parameters, p_uniform_dirty, p_textures_dirty, shader_data->uniforms, shader_data->ubo_offsets.ptr(), shader_data->texture_uniforms, shader_data->default_texture_params, shader_data->ubo_size, uniform_set_srgb, canvas_singleton->shader.canvas_shader.version_get_shader(shader_data->version, 0), MATERIAL_UNIFORM_SET, false, false); + MutexLock lock(canvas_singleton->shader.mutex); + RID shader_to_update = canvas_singleton->shader.canvas_shader.version_get_shader(shader_data->version, 0); + bool uniform_set_changed = update_parameters_uniform_set(p_parameters, p_uniform_dirty, p_textures_dirty, shader_data->uniforms, shader_data->ubo_offsets.ptr(), shader_data->texture_uniforms, shader_data->default_texture_params, shader_data->ubo_size, uniform_set, shader_to_update, MATERIAL_UNIFORM_SET, true, false); + bool uniform_set_srgb_changed = update_parameters_uniform_set(p_parameters, p_uniform_dirty, p_textures_dirty, shader_data->uniforms, shader_data->ubo_offsets.ptr(), shader_data->texture_uniforms, shader_data->default_texture_params, shader_data->ubo_size, uniform_set_srgb, shader_to_update, MATERIAL_UNIFORM_SET, false, false); return uniform_set_changed || uniform_set_srgb_changed; } @@ -1647,107 +1662,20 @@ RendererCanvasRenderRD::RendererCanvasRenderRD() { state.light_uniforms = memnew_arr(LightUniform, state.max_lights_per_render); Vector variants; - //non light variants - variants.push_back(""); //none by default is first variant - variants.push_back("#define USE_NINEPATCH\n"); //ninepatch is the second variant - variants.push_back("#define USE_PRIMITIVE\n"); //primitive is the third - variants.push_back("#define USE_PRIMITIVE\n#define USE_POINT_SIZE\n"); //points need point size - variants.push_back("#define USE_ATTRIBUTES\n"); // attributes for vertex arrays - variants.push_back("#define USE_ATTRIBUTES\n#define USE_POINT_SIZE\n"); //attributes with point size - //light variants - variants.push_back("#define USE_LIGHTING\n"); //none by default is first variant - variants.push_back("#define USE_LIGHTING\n#define USE_NINEPATCH\n"); //ninepatch is the second variant - variants.push_back("#define USE_LIGHTING\n#define USE_PRIMITIVE\n"); //primitive is the third - variants.push_back("#define USE_LIGHTING\n#define USE_PRIMITIVE\n#define USE_POINT_SIZE\n"); //points need point size - variants.push_back("#define USE_LIGHTING\n#define USE_ATTRIBUTES\n"); // attributes for vertex arrays - variants.push_back("#define USE_LIGHTING\n#define USE_ATTRIBUTES\n#define USE_POINT_SIZE\n"); //attributes with point size + const uint32_t ubershader_iterations = 1; + for (uint32_t ubershader = 0; ubershader < ubershader_iterations; ubershader++) { + const String base_define = ubershader ? "\n#define UBERSHADER\n" : ""; + variants.push_back(base_define + ""); // SHADER_VARIANT_QUAD + variants.push_back(base_define + "#define USE_NINEPATCH\n"); // SHADER_VARIANT_NINEPATCH + variants.push_back(base_define + "#define USE_PRIMITIVE\n"); // SHADER_VARIANT_PRIMITIVE + variants.push_back(base_define + "#define USE_ATTRIBUTES\n"); // SHADER_VARIANT_ATTRIBUTES + } shader.canvas_shader.initialize(variants, global_defines); - shader.default_version = shader.canvas_shader.version_create(); - shader.default_version_rd_shader = shader.canvas_shader.version_get_shader(shader.default_version, SHADER_VARIANT_QUAD); - - RD::PipelineColorBlendState blend_state; - RD::PipelineColorBlendState::Attachment blend_attachment; - - blend_attachment.enable_blend = true; - blend_attachment.color_blend_op = RD::BLEND_OP_ADD; - blend_attachment.src_color_blend_factor = RD::BLEND_FACTOR_SRC_ALPHA; - blend_attachment.dst_color_blend_factor = RD::BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; - - blend_attachment.alpha_blend_op = RD::BLEND_OP_ADD; - blend_attachment.src_alpha_blend_factor = RD::BLEND_FACTOR_ONE; - blend_attachment.dst_alpha_blend_factor = RD::BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; - - blend_state.attachments.push_back(blend_attachment); - - RD::PipelineColorBlendState::Attachment attachment_lcd; - attachment_lcd.enable_blend = true; - attachment_lcd.alpha_blend_op = RD::BLEND_OP_ADD; - attachment_lcd.color_blend_op = RD::BLEND_OP_ADD; - attachment_lcd.src_color_blend_factor = RD::BLEND_FACTOR_CONSTANT_COLOR; - attachment_lcd.dst_color_blend_factor = RD::BLEND_FACTOR_ONE_MINUS_SRC_COLOR; - attachment_lcd.src_alpha_blend_factor = RD::BLEND_FACTOR_ONE; - attachment_lcd.dst_alpha_blend_factor = RD::BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; - - RD::PipelineColorBlendState blend_state_lcd; - blend_state_lcd.attachments.push_back(attachment_lcd); - - for (int i = 0; i < PIPELINE_LIGHT_MODE_MAX; i++) { - for (int j = 0; j < PIPELINE_VARIANT_MAX; j++) { - RD::RenderPrimitive primitive[PIPELINE_VARIANT_MAX] = { - RD::RENDER_PRIMITIVE_TRIANGLES, - RD::RENDER_PRIMITIVE_TRIANGLES, - RD::RENDER_PRIMITIVE_TRIANGLES, - RD::RENDER_PRIMITIVE_LINES, - RD::RENDER_PRIMITIVE_POINTS, - RD::RENDER_PRIMITIVE_TRIANGLES, - RD::RENDER_PRIMITIVE_TRIANGLE_STRIPS, - RD::RENDER_PRIMITIVE_LINES, - RD::RENDER_PRIMITIVE_LINESTRIPS, - RD::RENDER_PRIMITIVE_POINTS, - RD::RENDER_PRIMITIVE_TRIANGLES, - }; - - ShaderVariant shader_variants[PIPELINE_LIGHT_MODE_MAX][PIPELINE_VARIANT_MAX] = { - { - //non lit - SHADER_VARIANT_QUAD, - SHADER_VARIANT_NINEPATCH, - SHADER_VARIANT_PRIMITIVE, - SHADER_VARIANT_PRIMITIVE, - SHADER_VARIANT_PRIMITIVE_POINTS, - SHADER_VARIANT_ATTRIBUTES, - SHADER_VARIANT_ATTRIBUTES, - SHADER_VARIANT_ATTRIBUTES, - SHADER_VARIANT_ATTRIBUTES, - SHADER_VARIANT_ATTRIBUTES_POINTS, - SHADER_VARIANT_QUAD, - }, - { - //lit - SHADER_VARIANT_QUAD_LIGHT, - SHADER_VARIANT_NINEPATCH_LIGHT, - SHADER_VARIANT_PRIMITIVE_LIGHT, - SHADER_VARIANT_PRIMITIVE_LIGHT, - SHADER_VARIANT_PRIMITIVE_POINTS_LIGHT, - SHADER_VARIANT_ATTRIBUTES_LIGHT, - SHADER_VARIANT_ATTRIBUTES_LIGHT, - SHADER_VARIANT_ATTRIBUTES_LIGHT, - SHADER_VARIANT_ATTRIBUTES_LIGHT, - SHADER_VARIANT_ATTRIBUTES_POINTS_LIGHT, - SHADER_VARIANT_QUAD_LIGHT, - }, - }; - - RID shader_variant = shader.canvas_shader.version_get_shader(shader.default_version, shader_variants[i][j]); - if (j == PIPELINE_VARIANT_QUAD_LCD_BLEND) { - shader.pipeline_variants.variants[i][j].setup(shader_variant, primitive[j], RD::PipelineRasterizationState(), RD::PipelineMultisampleState(), RD::PipelineDepthStencilState(), blend_state_lcd, RD::DYNAMIC_STATE_BLEND_CONSTANTS); - } else { - shader.pipeline_variants.variants[i][j].setup(shader_variant, primitive[j], RD::PipelineRasterizationState(), RD::PipelineMultisampleState(), RD::PipelineDepthStencilState(), blend_state, 0); - } - } - } + shader.default_version_data.version = shader.canvas_shader.version_create(); + shader.default_version_data.blend_mode = RendererRD::MaterialStorage::ShaderData::BLEND_MODE_MIX; + shader.default_version_rd_shader = shader.default_version_data.get_shader(SHADER_VARIANT_QUAD, false); } { @@ -2095,6 +2023,12 @@ void RendererCanvasRenderRD::set_debug_redraw(bool p_enabled, double p_time, con debug_redraw_color = p_color; } +uint32_t RendererCanvasRenderRD::get_pipeline_compilations(RS::PipelineSource p_source) { + RendererCanvasRenderRD *canvas_singleton = static_cast(RendererCanvasRender::singleton); + MutexLock lock(canvas_singleton->shader.mutex); + return shader.pipeline_compilations[p_source]; +} + void RendererCanvasRenderRD::_render_batch_items(RenderTarget p_to_render_target, int p_item_count, const Transform2D &p_canvas_transform_inverse, Light *p_lights, bool &r_sdf_used, bool p_to_backbuffer, RenderingMethod::RenderInfo *r_render_info) { // Record batches uint32_t instance_index = 0; @@ -2238,12 +2172,11 @@ void RendererCanvasRenderRD::_render_batch_items(RenderTarget p_to_render_target } } - PipelineVariants *pipeline_variants = &shader.pipeline_variants; - + CanvasShaderData *shader_data = &shader.default_version_data; CanvasMaterialData *material_data = current_batch->material_data; if (material_data) { - if (material_data->shader_data->version.is_valid() && material_data->shader_data->valid) { - pipeline_variants = &material_data->shader_data->pipeline_variants; + if (material_data->shader_data->version.is_valid() && material_data->shader_data->is_valid()) { + shader_data = material_data->shader_data; // Update uniform set. RID uniform_set = texture_storage->render_target_is_using_hdr(p_to_render_target.render_target) ? material_data->uniform_set : material_data->uniform_set_srgb; if (uniform_set.is_valid() && RD::get_singleton()->uniform_set_is_valid(uniform_set)) { // Material may not have a uniform set. @@ -2253,7 +2186,7 @@ void RendererCanvasRenderRD::_render_batch_items(RenderTarget p_to_render_target } } - _render_batch(draw_list, pipeline_variants, fb_format, p_lights, current_batch, r_render_info); + _render_batch(draw_list, shader_data, fb_format, p_lights, current_batch, r_render_info); } RD::get_singleton()->draw_list_end(); @@ -2285,7 +2218,6 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar uint32_t lights[4] = { 0, 0, 0, 0 }; uint16_t light_count = 0; - PipelineLightMode light_mode; { Light *light = p_lights; @@ -2307,11 +2239,11 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar base_flags |= light_count << FLAGS_LIGHT_COUNT_SHIFT; } - light_mode = (light_count > 0 || using_directional_lights) ? PIPELINE_LIGHT_MODE_ENABLED : PIPELINE_LIGHT_MODE_DISABLED; + bool use_lighting = (light_count > 0 || using_directional_lights); - if (light_mode != r_current_batch->light_mode) { + if (use_lighting != r_current_batch->use_lighting) { r_current_batch = _new_batch(r_batch_broken); - r_current_batch->light_mode = light_mode; + r_current_batch->use_lighting = use_lighting; } // new_instance_data should be called after the current_batch is set. @@ -2363,7 +2295,8 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar r_current_batch->command_type = Item::Command::TYPE_RECT; r_current_batch->command = c; // default variant - r_current_batch->pipeline_variant = PIPELINE_VARIANT_QUAD; + r_current_batch->shader_variant = SHADER_VARIANT_QUAD; + r_current_batch->render_primitive = RD::RENDER_PRIMITIVE_TRIANGLES; } if (bool(rect->flags & CANVAS_RECT_TILE)) { @@ -2391,7 +2324,8 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar r_current_batch = _new_batch(r_batch_broken); r_current_batch->has_blend = has_blend; r_current_batch->modulate = modulated; - r_current_batch->pipeline_variant = has_blend ? PIPELINE_VARIANT_QUAD_LCD_BLEND : PIPELINE_VARIANT_QUAD; + r_current_batch->shader_variant = SHADER_VARIANT_QUAD; + r_current_batch->render_primitive = RD::RENDER_PRIMITIVE_TRIANGLES; } InstanceData *instance_data = new_instance_data(); @@ -2480,7 +2414,8 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar r_current_batch->command_type = Item::Command::TYPE_NINEPATCH; r_current_batch->command = c; r_current_batch->has_blend = false; - r_current_batch->pipeline_variant = PipelineVariant::PIPELINE_VARIANT_NINEPATCH; + r_current_batch->shader_variant = SHADER_VARIANT_NINEPATCH; + r_current_batch->render_primitive = RD::RENDER_PRIMITIVE_TRIANGLES; } TextureState tex_state(np->texture, texture_filter, texture_repeat, false, use_linear_colors); @@ -2561,9 +2496,9 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar // pipeline variant { - static const PipelineVariant variant[RS::PRIMITIVE_MAX] = { PIPELINE_VARIANT_ATTRIBUTE_POINTS, PIPELINE_VARIANT_ATTRIBUTE_LINES, PIPELINE_VARIANT_ATTRIBUTE_LINES_STRIP, PIPELINE_VARIANT_ATTRIBUTE_TRIANGLES, PIPELINE_VARIANT_ATTRIBUTE_TRIANGLE_STRIP }; ERR_CONTINUE(polygon->primitive < 0 || polygon->primitive >= RS::PRIMITIVE_MAX); - r_current_batch->pipeline_variant = variant[polygon->primitive]; + r_current_batch->shader_variant = SHADER_VARIANT_ATTRIBUTES; + r_current_batch->render_primitive = _primitive_type_to_render_primitive(polygon->primitive); } InstanceData *instance_data = new_instance_data(); @@ -2591,9 +2526,24 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar r_current_batch->command = c; r_current_batch->primitive_points = primitive->point_count; - static const PipelineVariant variant[4] = { PIPELINE_VARIANT_PRIMITIVE_POINTS, PIPELINE_VARIANT_PRIMITIVE_LINES, PIPELINE_VARIANT_PRIMITIVE_TRIANGLES, PIPELINE_VARIANT_PRIMITIVE_TRIANGLES }; ERR_CONTINUE(primitive->point_count == 0 || primitive->point_count > 4); - r_current_batch->pipeline_variant = variant[primitive->point_count - 1]; + r_current_batch->shader_variant = SHADER_VARIANT_PRIMITIVE; + + switch (primitive->point_count) { + case 1: + r_current_batch->render_primitive = RD::RENDER_PRIMITIVE_POINTS; + break; + case 2: + r_current_batch->render_primitive = RD::RENDER_PRIMITIVE_LINES; + break; + case 3: + case 4: + r_current_batch->render_primitive = RD::RENDER_PRIMITIVE_TRIANGLES; + break; + default: + // Unknown point count. + break; + }; TextureState tex_state(primitive->texture, texture_filter, texture_repeat, false, use_linear_colors); if (tex_state != r_current_batch->tex_state) { @@ -2789,7 +2739,7 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar } } -void RendererCanvasRenderRD::_render_batch(RD::DrawListID p_draw_list, PipelineVariants *p_pipeline_variants, RenderingDevice::FramebufferFormatID p_framebuffer_format, Light *p_lights, Batch const *p_batch, RenderingMethod::RenderInfo *r_render_info) { +void RendererCanvasRenderRD::_render_batch(RD::DrawListID p_draw_list, CanvasShaderData *p_shader_data, RenderingDevice::FramebufferFormatID p_framebuffer_format, Light *p_lights, Batch const *p_batch, RenderingMethod::RenderInfo *r_render_info) { UniformSetCacheRD *uniform_set_cache = UniformSetCacheRD::get_singleton(); ERR_FAIL_NULL(uniform_set_cache); @@ -2797,17 +2747,25 @@ void RendererCanvasRenderRD::_render_batch(RD::DrawListID p_draw_list, PipelineV _bind_canvas_texture(p_draw_list, p_batch->tex_uniform_set); + RID pipeline; + PipelineKey pipeline_key; + PushConstant push_constant; + pipeline_key.framebuffer_format_id = p_framebuffer_format; + pipeline_key.variant = p_batch->shader_variant; + pipeline_key.render_primitive = p_batch->render_primitive; + pipeline_key.shader_specialization.use_lighting = p_batch->use_lighting; + pipeline_key.shader_specialization.use_point_size = pipeline_key.render_primitive == RD::RENDER_PRIMITIVE_POINTS; + pipeline_key.lcd_blend = p_batch->has_blend; + switch (p_batch->command_type) { case Item::Command::TYPE_RECT: case Item::Command::TYPE_NINEPATCH: { - RID pipeline = p_pipeline_variants->variants[p_batch->light_mode][p_batch->pipeline_variant].get_render_pipeline(RD::INVALID_ID, p_framebuffer_format); + pipeline = _get_pipeline_specialization_or_ubershader(p_shader_data, pipeline_key, push_constant); RD::get_singleton()->draw_list_bind_render_pipeline(p_draw_list, pipeline); if (p_batch->has_blend) { - DEV_ASSERT(p_batch->pipeline_variant == PIPELINE_VARIANT_QUAD_LCD_BLEND); RD::get_singleton()->draw_list_set_blend_constants(p_draw_list, p_batch->modulate); } - PushConstant push_constant; push_constant.base_instance_index = p_batch->start; RD::get_singleton()->draw_list_set_push_constant(p_draw_list, &push_constant, sizeof(PushConstant)); @@ -2833,10 +2791,10 @@ void RendererCanvasRenderRD::_render_batch(RD::DrawListID p_draw_list, PipelineV PolygonBuffers *pb = polygon_buffers.polygons.getptr(polygon->polygon.polygon_id); ERR_FAIL_NULL(pb); - RID pipeline = p_pipeline_variants->variants[p_batch->light_mode][p_batch->pipeline_variant].get_render_pipeline(pb->vertex_format_id, p_framebuffer_format); + pipeline_key.vertex_format_id = pb->vertex_format_id; + pipeline = _get_pipeline_specialization_or_ubershader(p_shader_data, pipeline_key, push_constant); RD::get_singleton()->draw_list_bind_render_pipeline(p_draw_list, pipeline); - PushConstant push_constant; push_constant.base_instance_index = p_batch->start; RD::get_singleton()->draw_list_set_push_constant(p_draw_list, &push_constant, sizeof(PushConstant)); @@ -2862,10 +2820,9 @@ void RendererCanvasRenderRD::_render_batch(RD::DrawListID p_draw_list, PipelineV case Item::Command::TYPE_PRIMITIVE: { const Item::CommandPrimitive *primitive = static_cast(p_batch->command); - RID pipeline = p_pipeline_variants->variants[p_batch->light_mode][p_batch->pipeline_variant].get_render_pipeline(RD::INVALID_ID, p_framebuffer_format); + pipeline = _get_pipeline_specialization_or_ubershader(p_shader_data, pipeline_key, push_constant); RD::get_singleton()->draw_list_bind_render_pipeline(p_draw_list, pipeline); - PushConstant push_constant; push_constant.base_instance_index = p_batch->start; RD::get_singleton()->draw_list_set_push_constant(p_draw_list, &push_constant, sizeof(PushConstant)); @@ -2941,7 +2898,6 @@ void RendererCanvasRenderRD::_render_batch(RD::DrawListID p_draw_list, PipelineV INSTANCE_DATA_UNIFORM_SET); uint32_t surf_count = mesh_storage->mesh_get_surface_count(mesh); - static const PipelineVariant variant[RS::PRIMITIVE_MAX] = { PIPELINE_VARIANT_ATTRIBUTE_POINTS, PIPELINE_VARIANT_ATTRIBUTE_LINES, PIPELINE_VARIANT_ATTRIBUTE_LINES_STRIP, PIPELINE_VARIANT_ATTRIBUTE_TRIANGLES, PIPELINE_VARIANT_ATTRIBUTE_TRIANGLE_STRIP }; for (uint32_t j = 0; j < surf_count; j++) { void *surface = mesh_storage->mesh_get_surface(mesh, j); @@ -2949,7 +2905,7 @@ void RendererCanvasRenderRD::_render_batch(RD::DrawListID p_draw_list, PipelineV RS::PrimitiveType primitive = mesh_storage->mesh_surface_get_primitive(surface); ERR_CONTINUE(primitive < 0 || primitive >= RS::PRIMITIVE_MAX); - uint64_t input_mask = p_pipeline_variants->variants[p_batch->light_mode][variant[primitive]].get_vertex_input_mask(); + uint64_t input_mask = p_shader_data->get_vertex_input_mask(pipeline_key.variant, pipeline_key.ubershader); RID vertex_array; RD::VertexFormatID vertex_format = RD::INVALID_FORMAT_ID; @@ -2960,10 +2916,14 @@ void RendererCanvasRenderRD::_render_batch(RD::DrawListID p_draw_list, PipelineV mesh_storage->mesh_surface_get_vertex_arrays_and_format(surface, input_mask, false, vertex_array, vertex_format); } - RID pipeline = p_pipeline_variants->variants[p_batch->light_mode][variant[primitive]].get_render_pipeline(vertex_format, p_framebuffer_format); + pipeline_key.variant = SHADER_VARIANT_ATTRIBUTES; + pipeline_key.render_primitive = _primitive_type_to_render_primitive(primitive); + pipeline_key.shader_specialization.use_point_size = pipeline_key.render_primitive == RD::RENDER_PRIMITIVE_POINTS; + pipeline_key.vertex_format_id = vertex_format; + + pipeline = _get_pipeline_specialization_or_ubershader(p_shader_data, pipeline_key, push_constant); RD::get_singleton()->draw_list_bind_render_pipeline(p_draw_list, pipeline); - PushConstant push_constant; push_constant.base_instance_index = p_batch->start; RD::get_singleton()->draw_list_set_push_constant(p_draw_list, &push_constant, sizeof(PushConstant)); @@ -3127,11 +3087,6 @@ RendererCanvasRenderRD::~RendererCanvasRenderRD() { //this will also automatically clear all pipelines RD::get_singleton()->free(state.shadow_sampler); } - //bindings - - //shaders - - shader.canvas_shader.version_free(shader.default_version); //buffers { diff --git a/servers/rendering/renderer_rd/renderer_canvas_render_rd.h b/servers/rendering/renderer_rd/renderer_canvas_render_rd.h index 8d90cd23ce25..fb9525aed004 100644 --- a/servers/rendering/renderer_rd/renderer_canvas_render_rd.h +++ b/servers/rendering/renderer_rd/renderer_canvas_render_rd.h @@ -34,6 +34,7 @@ #include "servers/rendering/renderer_canvas_render.h" #include "servers/rendering/renderer_compositor.h" #include "servers/rendering/renderer_rd/pipeline_cache_rd.h" +#include "servers/rendering/renderer_rd/pipeline_hash_map_rd.h" #include "servers/rendering/renderer_rd/shaders/canvas.glsl.gen.h" #include "servers/rendering/renderer_rd/shaders/canvas_occlusion.glsl.gen.h" #include "servers/rendering/renderer_rd/storage_rd/material_storage.h" @@ -55,15 +56,7 @@ class RendererCanvasRenderRD : public RendererCanvasRender { SHADER_VARIANT_QUAD, SHADER_VARIANT_NINEPATCH, SHADER_VARIANT_PRIMITIVE, - SHADER_VARIANT_PRIMITIVE_POINTS, SHADER_VARIANT_ATTRIBUTES, - SHADER_VARIANT_ATTRIBUTES_POINTS, - SHADER_VARIANT_QUAD_LIGHT, - SHADER_VARIANT_NINEPATCH_LIGHT, - SHADER_VARIANT_PRIMITIVE_LIGHT, - SHADER_VARIANT_PRIMITIVE_POINTS_LIGHT, - SHADER_VARIANT_ATTRIBUTES_LIGHT, - SHADER_VARIANT_ATTRIBUTES_POINTS_LIGHT, SHADER_VARIANT_MAX }; @@ -85,14 +78,14 @@ class RendererCanvasRenderRD : public RendererCanvasRender { FLAGS_NINEPATCH_V_MODE_SHIFT = 18, FLAGS_LIGHT_COUNT_SHIFT = 20, - FLAGS_DEFAULT_NORMAL_MAP_USED = (1 << 26), - FLAGS_DEFAULT_SPECULAR_MAP_USED = (1 << 27), + FLAGS_DEFAULT_NORMAL_MAP_USED = (1 << 24), + FLAGS_DEFAULT_SPECULAR_MAP_USED = (1 << 25), - FLAGS_USE_MSDF = (1 << 28), - FLAGS_USE_LCD = (1 << 29), + FLAGS_USE_MSDF = (1 << 26), + FLAGS_USE_LCD = (1 << 27), - FLAGS_FLIP_H = (1 << 30), - FLAGS_FLIP_V = (1 << 31), + FLAGS_FLIP_H = (1 << 28), + FLAGS_FLIP_V = (1 << 29), }; enum { @@ -119,76 +112,82 @@ class RendererCanvasRenderRD : public RendererCanvasRender { /**** SHADER ****/ /****************/ - enum PipelineVariant { - PIPELINE_VARIANT_QUAD, - PIPELINE_VARIANT_NINEPATCH, - PIPELINE_VARIANT_PRIMITIVE_TRIANGLES, - PIPELINE_VARIANT_PRIMITIVE_LINES, - PIPELINE_VARIANT_PRIMITIVE_POINTS, - PIPELINE_VARIANT_ATTRIBUTE_TRIANGLES, - PIPELINE_VARIANT_ATTRIBUTE_TRIANGLE_STRIP, - PIPELINE_VARIANT_ATTRIBUTE_LINES, - PIPELINE_VARIANT_ATTRIBUTE_LINES_STRIP, - PIPELINE_VARIANT_ATTRIBUTE_POINTS, - PIPELINE_VARIANT_QUAD_LCD_BLEND, - PIPELINE_VARIANT_MAX - }; - enum PipelineLightMode { - PIPELINE_LIGHT_MODE_DISABLED, - PIPELINE_LIGHT_MODE_ENABLED, - PIPELINE_LIGHT_MODE_MAX - }; + struct ShaderSpecialization { + union { + struct { + uint32_t use_lighting : 1; + uint32_t use_point_size : 1; + }; - struct PipelineVariants { - PipelineCacheRD variants[PIPELINE_LIGHT_MODE_MAX][PIPELINE_VARIANT_MAX]; + uint32_t packed_0; + }; }; - struct { - CanvasShaderRD canvas_shader; - RID default_version; - RID default_version_rd_shader; - RID quad_index_buffer; - RID quad_index_array; - PipelineVariants pipeline_variants; - - ShaderCompiler compiler; - } shader; + struct PipelineKey { + ShaderVariant variant = SHADER_VARIANT_MAX; + RD::FramebufferFormatID framebuffer_format_id = RD::INVALID_FORMAT_ID; + RD::VertexFormatID vertex_format_id = RD::INVALID_ID; + RD::RenderPrimitive render_primitive = RD::RENDER_PRIMITIVE_MAX; + ShaderSpecialization shader_specialization = {}; + uint32_t lcd_blend = 0; + uint32_t ubershader = 0; + + uint32_t hash() const { + uint32_t h = hash_murmur3_one_32(variant); + h = hash_murmur3_one_32(framebuffer_format_id, h); + h = hash_murmur3_one_32(vertex_format_id, h); + h = hash_murmur3_one_32(render_primitive, h); + h = hash_murmur3_one_32(shader_specialization.packed_0, h); + h = hash_murmur3_one_32(lcd_blend, h); + h = hash_murmur3_one_32(ubershader, h); + return hash_fmix32(h); + } + }; struct CanvasShaderData : public RendererRD::MaterialStorage::ShaderData { - enum BlendMode { //used internally - BLEND_MODE_MIX, - BLEND_MODE_ADD, - BLEND_MODE_SUB, - BLEND_MODE_MUL, - BLEND_MODE_PMALPHA, - BLEND_MODE_DISABLED, - }; - - bool valid = false; - RID version; - PipelineVariants pipeline_variants; - Vector texture_uniforms; + int blend_mode = 0; Vector ubo_offsets; uint32_t ubo_size = 0; String code; + RID version; + PipelineHashMapRD pipeline_hash_map; + + static const uint32_t VERTEX_INPUT_MASKS_SIZE = SHADER_VARIANT_MAX * 2; + std::atomic vertex_input_masks[VERTEX_INPUT_MASKS_SIZE] = {}; bool uses_screen_texture = false; bool uses_screen_texture_mipmaps = false; bool uses_sdf = false; bool uses_time = false; + void _clear_vertex_input_mask_cache(); + void _create_pipeline(PipelineKey p_pipeline_key); virtual void set_code(const String &p_Code); virtual bool is_animated() const; virtual bool casts_shadows() const; virtual RS::ShaderNativeSourceCode get_native_source_code() const; + RID get_shader(ShaderVariant p_shader_variant, bool p_ubershader) const; + uint64_t get_vertex_input_mask(ShaderVariant p_shader_variant, bool p_ubershader); + bool is_valid() const; - CanvasShaderData() {} + CanvasShaderData(); virtual ~CanvasShaderData(); }; + struct { + CanvasShaderRD canvas_shader; + RID default_version_rd_shader; + CanvasShaderData default_version_data; + RID quad_index_buffer; + RID quad_index_array; + ShaderCompiler compiler; + uint32_t pipeline_compilations[RS::PIPELINE_SOURCE_MAX] = {}; + Mutex mutex; + } shader; + RendererRD::MaterialStorage::ShaderData *_create_shader_func(); static RendererRD::MaterialStorage::ShaderData *_create_shader_funcs() { return static_cast(singleton)->_create_shader_func(); @@ -365,7 +364,7 @@ class RendererCanvasRenderRD : public RendererCanvasRender { struct PushConstant { uint32_t base_instance_index; - uint32_t pad1; + ShaderSpecialization shader_specialization; uint32_t pad2; uint32_t pad3; }; @@ -448,11 +447,12 @@ class RendererCanvasRenderRD : public RendererCanvasRender { RID material; CanvasMaterialData *material_data = nullptr; - PipelineLightMode light_mode = PipelineLightMode::PIPELINE_LIGHT_MODE_DISABLED; - PipelineVariant pipeline_variant = PipelineVariant::PIPELINE_VARIANT_QUAD; const Item::Command *command = nullptr; Item::Command::Type command_type = Item::Command::TYPE_ANIMATION_SLICE; // Can default to any type that doesn't form a batch. + ShaderVariant shader_variant = SHADER_VARIANT_QUAD; + RD::RenderPrimitive render_primitive = RD::RENDER_PRIMITIVE_TRIANGLES; + bool use_lighting = false; // batch-specific data union { @@ -558,9 +558,10 @@ class RendererCanvasRenderRD : public RendererCanvasRender { uint32_t base_flags = 0; }; + inline RID _get_pipeline_specialization_or_ubershader(CanvasShaderData *p_shader_data, PipelineKey &r_pipeline_key, PushConstant &r_push_constant, RID p_mesh_instance = RID(), void *p_surface = nullptr, uint32_t p_surface_index = 0, RID *r_vertex_array = nullptr); void _render_batch_items(RenderTarget p_to_render_target, int p_item_count, const Transform2D &p_canvas_transform_inverse, Light *p_lights, bool &r_sdf_used, bool p_to_backbuffer = false, RenderingMethod::RenderInfo *r_render_info = nullptr); void _record_item_commands(const Item *p_item, RenderTarget p_render_target, const Transform2D &p_base_transform, Item *&r_current_clip, Light *p_lights, uint32_t &r_index, bool &r_batch_broken, bool &r_sdf_used, Batch *&r_current_batch); - void _render_batch(RD::DrawListID p_draw_list, PipelineVariants *p_pipeline_variants, RenderingDevice::FramebufferFormatID p_framebuffer_format, Light *p_lights, Batch const *p_batch, RenderingMethod::RenderInfo *r_render_info = nullptr); + void _render_batch(RD::DrawListID p_draw_list, CanvasShaderData *p_shader_data, RenderingDevice::FramebufferFormatID p_framebuffer_format, Light *p_lights, Batch const *p_batch, RenderingMethod::RenderInfo *r_render_info = nullptr); void _prepare_batch_texture(Batch *p_current_batch, RID p_texture) const; void _bind_canvas_texture(RD::DrawListID p_draw_list, RID p_uniform_set); [[nodiscard]] Batch *_new_batch(bool &r_batch_broken); @@ -596,6 +597,7 @@ class RendererCanvasRenderRD : public RendererCanvasRender { virtual void set_shadow_texture_size(int p_size) override; void set_debug_redraw(bool p_enabled, double p_time, const Color &p_color) override; + uint32_t get_pipeline_compilations(RS::PipelineSource p_source) override; void set_time(double p_time); void update() override; diff --git a/servers/rendering/renderer_rd/shader_rd.cpp b/servers/rendering/renderer_rd/shader_rd.cpp index 39c3e9b168a1..6234cddee393 100644 --- a/servers/rendering/renderer_rd/shader_rd.cpp +++ b/servers/rendering/renderer_rd/shader_rd.cpp @@ -39,6 +39,8 @@ #include "servers/rendering/rendering_device.h" #include "thirdparty/misc/smolv.h" +#define ENABLE_SHADER_CACHE 1 + void ShaderRD::_add_stage(const char *p_code, StageType p_stage_type) { Vector lines = String(p_code).split("\n"); @@ -144,7 +146,8 @@ RID ShaderRD::version_create() { version.dirty = true; version.valid = false; version.initialize_needed = true; - version.variants = nullptr; + version.variants.clear(); + version.variant_data.clear(); return version_owner.make_rid(version); } @@ -154,23 +157,25 @@ void ShaderRD::_initialize_version(Version *p_version) { p_version->valid = false; p_version->dirty = false; - p_version->variants = memnew_arr(RID, variant_defines.size()); + p_version->variants.resize_zeroed(variant_defines.size()); + p_version->variant_data.resize(variant_defines.size()); + p_version->group_compilation_tasks.resize(group_enabled.size()); + p_version->group_compilation_tasks.fill(0); } void ShaderRD::_clear_version(Version *p_version) { + _compile_ensure_finished(p_version); + // Clear versions if they exist. - if (p_version->variants) { + if (!p_version->variants.is_empty()) { for (int i = 0; i < variant_defines.size(); i++) { if (p_version->variants[i].is_valid()) { RD::get_singleton()->free(p_version->variants[i]); } } - memdelete_arr(p_version->variants); - if (p_version->variant_data) { - memdelete_arr(p_version->variant_data); - } - p_version->variants = nullptr; + p_version->variants.clear(); + p_version->variant_data.clear(); } } @@ -227,8 +232,8 @@ void ShaderRD::_build_variant_code(StringBuilder &builder, uint32_t p_variant, c } } -void ShaderRD::_compile_variant(uint32_t p_variant, const CompileData *p_data) { - uint32_t variant = group_to_variant_map[p_data->group][p_variant]; +void ShaderRD::_compile_variant(uint32_t p_variant, CompileData p_data) { + uint32_t variant = group_to_variant_map[p_data.group][p_variant]; if (!variants_enabled[variant]) { return; // Variant is disabled, return. @@ -245,7 +250,7 @@ void ShaderRD::_compile_variant(uint32_t p_variant, const CompileData *p_data) { //vertex stage StringBuilder builder; - _build_variant_code(builder, variant, p_data->version, stage_templates[STAGE_TYPE_VERTEX]); + _build_variant_code(builder, variant, p_data.version, stage_templates[STAGE_TYPE_VERTEX]); current_source = builder.as_string(); RD::ShaderStageSPIRVData stage; @@ -263,7 +268,7 @@ void ShaderRD::_compile_variant(uint32_t p_variant, const CompileData *p_data) { current_stage = RD::SHADER_STAGE_FRAGMENT; StringBuilder builder; - _build_variant_code(builder, variant, p_data->version, stage_templates[STAGE_TYPE_FRAGMENT]); + _build_variant_code(builder, variant, p_data.version, stage_templates[STAGE_TYPE_FRAGMENT]); current_source = builder.as_string(); RD::ShaderStageSPIRVData stage; @@ -281,7 +286,7 @@ void ShaderRD::_compile_variant(uint32_t p_variant, const CompileData *p_data) { current_stage = RD::SHADER_STAGE_COMPUTE; StringBuilder builder; - _build_variant_code(builder, variant, p_data->version, stage_templates[STAGE_TYPE_COMPUTE]); + _build_variant_code(builder, variant, p_data.version, stage_templates[STAGE_TYPE_COMPUTE]); current_source = builder.as_string(); @@ -313,8 +318,8 @@ void ShaderRD::_compile_variant(uint32_t p_variant, const CompileData *p_data) { { MutexLock lock(variant_set_mutex); - p_data->version->variants[variant] = RD::get_singleton()->shader_create_from_bytecode(shader_data, p_data->version->variants[variant]); - p_data->version->variant_data[variant] = shader_data; + p_data.version->variants.write[variant] = RD::get_singleton()->shader_create_from_bytecode(shader_data, p_data.version->variants[variant]); + p_data.version->variant_data.write[variant] = shader_data; } } @@ -443,14 +448,14 @@ bool ShaderRD::_load_from_cache(Version *p_version, int p_group) { ERR_FAIL_COND_V(br != variant_size, false); - p_version->variant_data[variant_id] = variant_bytes; + p_version->variant_data.write[variant_id] = variant_bytes; } for (uint32_t i = 0; i < variant_count; i++) { int variant_id = group_to_variant_map[p_group][i]; if (!variants_enabled[variant_id]) { MutexLock lock(variant_set_mutex); - p_version->variants[variant_id] = RID(); + p_version->variants.write[variant_id] = RID(); continue; } { @@ -464,12 +469,10 @@ bool ShaderRD::_load_from_cache(Version *p_version, int p_group) { ERR_FAIL_COND_V(shader.is_null(), false); } - p_version->variants[variant_id] = shader; + p_version->variants.write[variant_id] = shader; } } - memdelete_arr(p_version->variant_data); //clear stages - p_version->variant_data = nullptr; p_version->valid = true; return true; } @@ -491,48 +494,51 @@ void ShaderRD::_save_to_cache(Version *p_version, int p_group) { } void ShaderRD::_allocate_placeholders(Version *p_version, int p_group) { - ERR_FAIL_NULL(p_version->variants); + ERR_FAIL_COND(p_version->variants.is_empty()); + for (uint32_t i = 0; i < group_to_variant_map[p_group].size(); i++) { int variant_id = group_to_variant_map[p_group][i]; RID shader = RD::get_singleton()->shader_create_placeholder(); { MutexLock lock(variant_set_mutex); - p_version->variants[variant_id] = shader; + p_version->variants.write[variant_id] = shader; } } } // Try to compile all variants for a given group. // Will skip variants that are disabled. -void ShaderRD::_compile_version(Version *p_version, int p_group) { +void ShaderRD::_compile_version_start(Version *p_version, int p_group) { if (!group_enabled[p_group]) { return; } - typedef Vector ShaderStageData; - p_version->variant_data = memnew_arr(ShaderStageData, variant_defines.size()); - p_version->dirty = false; +#if ENABLE_SHADER_CACHE if (shader_cache_dir_valid) { if (_load_from_cache(p_version, p_group)) { return; } } +#endif CompileData compile_data; compile_data.version = p_version; compile_data.group = p_group; -#if 1 - WorkerThreadPool::GroupID group_task = WorkerThreadPool::get_singleton()->add_template_group_task(this, &ShaderRD::_compile_variant, &compile_data, group_to_variant_map[p_group].size(), -1, true, SNAME("ShaderCompilation")); - WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task); + WorkerThreadPool::GroupID group_task = WorkerThreadPool::get_singleton()->add_template_group_task(this, &ShaderRD::_compile_variant, compile_data, group_to_variant_map[p_group].size(), -1, true, SNAME("ShaderCompilation")); + p_version->group_compilation_tasks.write[p_group] = group_task; +} -#else - for (uint32_t i = 0; i < group_to_variant_map[p_group].size(); i++) { - _compile_variant(i, &compile_data); +void ShaderRD::_compile_version_end(Version *p_version, int p_group) { + if (p_version->group_compilation_tasks.size() <= p_group || p_version->group_compilation_tasks[p_group] == 0) { + return; } -#endif + + WorkerThreadPool::GroupID group_task = p_version->group_compilation_tasks[p_group]; + WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task); + p_version->group_compilation_tasks.write[p_group] = 0; bool all_valid = true; @@ -557,29 +563,35 @@ void ShaderRD::_compile_version(Version *p_version, int p_group) { RD::get_singleton()->free(p_version->variants[i]); } } - memdelete_arr(p_version->variants); - if (p_version->variant_data) { - memdelete_arr(p_version->variant_data); - } - p_version->variants = nullptr; - p_version->variant_data = nullptr; + + p_version->variants.clear(); + p_version->variant_data.clear(); return; - } else if (shader_cache_dir_valid) { - // Save shader cache. + } +#if ENABLE_SHADER_CACHE + else if (shader_cache_dir_valid) { _save_to_cache(p_version, p_group); } - - memdelete_arr(p_version->variant_data); //clear stages - p_version->variant_data = nullptr; +#endif p_version->valid = true; } +void ShaderRD::_compile_ensure_finished(Version *p_version) { + // Wait for compilation of existing groups if necessary. + for (int i = 0; i < group_enabled.size(); i++) { + _compile_version_end(p_version, i); + } +} + void ShaderRD::version_set_code(RID p_version, const HashMap &p_code, const String &p_uniforms, const String &p_vertex_globals, const String &p_fragment_globals, const Vector &p_custom_defines) { ERR_FAIL_COND(is_compute); Version *version = version_owner.get_or_null(p_version); ERR_FAIL_NULL(version); + + _compile_ensure_finished(version); + version->vertex_globals = p_vertex_globals.utf8(); version->fragment_globals = p_fragment_globals.utf8(); version->uniforms = p_uniforms.utf8(); @@ -601,7 +613,7 @@ void ShaderRD::version_set_code(RID p_version, const HashMap &p_ _allocate_placeholders(version, i); continue; } - _compile_version(version, i); + _compile_version_start(version, i); } version->initialize_needed = false; } @@ -613,6 +625,8 @@ void ShaderRD::version_set_compute_code(RID p_version, const HashMapcompute_globals = p_compute_globals.utf8(); version->uniforms = p_uniforms.utf8(); @@ -634,7 +648,7 @@ void ShaderRD::version_set_compute_code(RID p_version, const HashMapinitialize_needed = false; } @@ -651,10 +665,12 @@ bool ShaderRD::version_is_valid(RID p_version) { _allocate_placeholders(version, i); continue; } - _compile_version(version, i); + _compile_version_start(version, i); } } + _compile_ensure_finished(version); + return version->valid; } @@ -696,7 +712,7 @@ void ShaderRD::enable_group(int p_group) { version_owner.get_owned_list(&all_versions); for (const RID &E : all_versions) { Version *version = version_owner.get_or_null(E); - _compile_version(version, p_group); + _compile_version_start(version, p_group); } } @@ -735,6 +751,7 @@ void ShaderRD::initialize(const Vector &p_variant_defines, const String for (int i = 0; i < p_variant_defines.size(); i++) { variant_defines.push_back(VariantDefine(0, p_variant_defines[i], true)); variants_enabled.push_back(true); + variant_to_group.push_back(0); group_to_variant_map[0].push_back(i); } @@ -796,6 +813,7 @@ void ShaderRD::initialize(const Vector &p_variant_defines, const // Fill variant array. variant_defines.push_back(p_variant_defines[i]); variants_enabled.push_back(true); + variant_to_group.push_back(p_variant_defines[i].group); // Map variant array index to group id, so we can iterate over groups later. if (!group_to_variant_map.has(p_variant_defines[i].group)) { diff --git a/servers/rendering/renderer_rd/shader_rd.h b/servers/rendering/renderer_rd/shader_rd.h index 688092d604fb..90e41947b937 100644 --- a/servers/rendering/renderer_rd/shader_rd.h +++ b/servers/rendering/renderer_rd/shader_rd.h @@ -59,6 +59,7 @@ class ShaderRD { CharString general_defines; Vector variant_defines; Vector variants_enabled; + Vector variant_to_group; HashMap> group_to_variant_map; Vector group_enabled; @@ -69,9 +70,10 @@ class ShaderRD { CharString fragment_globals; HashMap code_sections; Vector custom_defines; + Vector group_compilation_tasks; - Vector *variant_data = nullptr; - RID *variants = nullptr; // Same size as variant defines. + Vector> variant_data; + Vector variants; bool valid; bool dirty; @@ -85,11 +87,13 @@ class ShaderRD { int group = 0; }; - void _compile_variant(uint32_t p_variant, const CompileData *p_data); + void _compile_variant(uint32_t p_variant, CompileData p_data); void _initialize_version(Version *p_version); void _clear_version(Version *p_version); - void _compile_version(Version *p_version, int p_group); + void _compile_version_start(Version *p_version, int p_group); + void _compile_version_end(Version *p_version, int p_group); + void _compile_ensure_finished(Version *p_version); void _allocate_placeholders(Version *p_version, int p_group); RID_Owner version_owner; @@ -172,10 +176,15 @@ class ShaderRD { _allocate_placeholders(version, i); continue; } - _compile_version(version, i); + _compile_version_start(version, i); } } + uint32_t group = variant_to_group[p_variant]; + if (version->group_compilation_tasks[group] != 0) { + _compile_version_end(version, group); + } + if (!version->valid) { return RID(); } diff --git a/servers/rendering/renderer_rd/shaders/canvas.glsl b/servers/rendering/renderer_rd/shaders/canvas.glsl index 704aafdfa5dc..622566eba086 100644 --- a/servers/rendering/renderer_rd/shaders/canvas.glsl +++ b/servers/rendering/renderer_rd/shaders/canvas.glsl @@ -193,9 +193,7 @@ void main() { } #endif // USE_ATTRIBUTES -#ifdef USE_POINT_SIZE float point_size = 1.0; -#endif #ifdef USE_WORLD_VERTEX_COORDS vertex = (model_matrix * vec4(vertex, 0.0, 1.0)).xy; @@ -228,9 +226,9 @@ void main() { gl_Position = canvas_data.screen_transform * vec4(vertex, 0.0, 1.0); -#ifdef USE_POINT_SIZE - gl_PointSize = point_size; -#endif + if (sc_use_point_size()) { + gl_PointSize = point_size; + } } #[fragment] @@ -368,8 +366,6 @@ float map_ninepatch_axis(float pixel, float draw_size, float tex_pixel_size, flo #endif -#ifdef USE_LIGHTING - vec3 light_normal_compute(vec3 light_vec, vec3 normal, vec3 base_color, vec3 light_color, vec4 specular_shininess, bool specular_shininess_used) { float cNdotL = max(0.0, dot(normal, light_vec)); @@ -459,8 +455,6 @@ void light_blend_compute(uint light_base, vec4 light_color, inout vec3 color) { } } -#endif - float msdf_median(float r, float g, float b, float a) { return min(max(min(r, g), min(max(r, g), b)), a); } @@ -533,7 +527,7 @@ void main() { color *= texture(sampler2D(color_texture, texture_sampler), uv); } - uint light_count = bitfieldExtract(draw_data.flags, FLAGS_LIGHT_COUNT_SHIFT, 4); //max 16 lights + uint light_count = bitfieldExtract(draw_data.flags, FLAGS_LIGHT_COUNT_SHIFT, 4); //max 15 lights bool using_light = (light_count + canvas_data.directional_light_count) > 0; vec3 normal; @@ -617,134 +611,135 @@ void main() { color *= canvas_data.canvas_modulation; #endif -#if defined(USE_LIGHTING) && !defined(MODE_UNSHADED) +#if !defined(MODE_UNSHADED) + if (sc_use_lighting()) { + // Directional Lights - // Directional Lights + for (uint i = 0; i < canvas_data.directional_light_count; i++) { + uint light_base = i; - for (uint i = 0; i < canvas_data.directional_light_count; i++) { - uint light_base = i; - - vec2 direction = light_array.data[light_base].position; - vec4 light_color = light_array.data[light_base].color; + vec2 direction = light_array.data[light_base].position; + vec4 light_color = light_array.data[light_base].color; #ifdef LIGHT_CODE_USED - vec4 shadow_modulate = vec4(1.0); - light_color = light_compute(light_vertex, vec3(direction, light_array.data[light_base].height), normal, light_color, light_color.a, specular_shininess, shadow_modulate, screen_uv, uv, base_color, true); + vec4 shadow_modulate = vec4(1.0); + light_color = light_compute(light_vertex, vec3(direction, light_array.data[light_base].height), normal, light_color, light_color.a, specular_shininess, shadow_modulate, screen_uv, uv, base_color, true); #else - if (normal_used) { - vec3 light_vec = normalize(mix(vec3(direction, 0.0), vec3(0, 0, 1), light_array.data[light_base].height)); - light_color.rgb = light_normal_compute(light_vec, normal, base_color.rgb, light_color.rgb, specular_shininess, specular_shininess_used); - } else { - light_color.rgb *= base_color.rgb; - } + if (normal_used) { + vec3 light_vec = normalize(mix(vec3(direction, 0.0), vec3(0, 0, 1), light_array.data[light_base].height)); + light_color.rgb = light_normal_compute(light_vec, normal, base_color.rgb, light_color.rgb, specular_shininess, specular_shininess_used); + } else { + light_color.rgb *= base_color.rgb; + } #endif - if (bool(light_array.data[light_base].flags & LIGHT_FLAGS_HAS_SHADOW)) { - vec2 shadow_pos = (vec4(shadow_vertex, 0.0, 1.0) * mat4(light_array.data[light_base].shadow_matrix[0], light_array.data[light_base].shadow_matrix[1], vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0))).xy; //multiply inverse given its transposed. Optimizer removes useless operations. + if (bool(light_array.data[light_base].flags & LIGHT_FLAGS_HAS_SHADOW)) { + vec2 shadow_pos = (vec4(shadow_vertex, 0.0, 1.0) * mat4(light_array.data[light_base].shadow_matrix[0], light_array.data[light_base].shadow_matrix[1], vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0))).xy; //multiply inverse given its transposed. Optimizer removes useless operations. - vec4 shadow_uv = vec4(shadow_pos.x, light_array.data[light_base].shadow_y_ofs, shadow_pos.y * light_array.data[light_base].shadow_zfar_inv, 1.0); + vec4 shadow_uv = vec4(shadow_pos.x, light_array.data[light_base].shadow_y_ofs, shadow_pos.y * light_array.data[light_base].shadow_zfar_inv, 1.0); - light_color = light_shadow_compute(light_base, light_color, shadow_uv + light_color = light_shadow_compute(light_base, light_color, shadow_uv #ifdef LIGHT_CODE_USED - , - shadow_modulate.rgb + , + shadow_modulate.rgb #endif - ); - } + ); + } - light_blend_compute(light_base, light_color, color.rgb); + light_blend_compute(light_base, light_color, color.rgb); #ifdef MODE_LIGHT_ONLY - light_only_alpha += light_color.a; + light_only_alpha += light_color.a; #endif - } + } - // Positional Lights + // Positional Lights - for (uint i = 0; i < MAX_LIGHTS_PER_ITEM; i++) { - if (i >= light_count) { - break; - } - uint light_base = bitfieldExtract(draw_data.lights[i >> 2], (int(i) & 0x3) * 8, 8); + for (uint i = 0; i < MAX_LIGHTS_PER_ITEM; i++) { + if (i >= light_count) { + break; + } + uint light_base = bitfieldExtract(draw_data.lights[i >> 2], (int(i) & 0x3) * 8, 8); - vec2 tex_uv = (vec4(vertex, 0.0, 1.0) * mat4(light_array.data[light_base].texture_matrix[0], light_array.data[light_base].texture_matrix[1], vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0))).xy; //multiply inverse given its transposed. Optimizer removes useless operations. - vec2 tex_uv_atlas = tex_uv * light_array.data[light_base].atlas_rect.zw + light_array.data[light_base].atlas_rect.xy; + vec2 tex_uv = (vec4(vertex, 0.0, 1.0) * mat4(light_array.data[light_base].texture_matrix[0], light_array.data[light_base].texture_matrix[1], vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0))).xy; //multiply inverse given its transposed. Optimizer removes useless operations. + vec2 tex_uv_atlas = tex_uv * light_array.data[light_base].atlas_rect.zw + light_array.data[light_base].atlas_rect.xy; - if (any(lessThan(tex_uv, vec2(0.0, 0.0))) || any(greaterThanEqual(tex_uv, vec2(1.0, 1.0)))) { - //if outside the light texture, light color is zero - continue; - } + if (any(lessThan(tex_uv, vec2(0.0, 0.0))) || any(greaterThanEqual(tex_uv, vec2(1.0, 1.0)))) { + //if outside the light texture, light color is zero + continue; + } - vec4 light_color = textureLod(sampler2D(atlas_texture, texture_sampler), tex_uv_atlas, 0.0); - vec4 light_base_color = light_array.data[light_base].color; + vec4 light_color = textureLod(sampler2D(atlas_texture, texture_sampler), tex_uv_atlas, 0.0); + vec4 light_base_color = light_array.data[light_base].color; #ifdef LIGHT_CODE_USED - vec4 shadow_modulate = vec4(1.0); - vec3 light_position = vec3(light_array.data[light_base].position, light_array.data[light_base].height); + vec4 shadow_modulate = vec4(1.0); + vec3 light_position = vec3(light_array.data[light_base].position, light_array.data[light_base].height); - light_color.rgb *= light_base_color.rgb; - light_color = light_compute(light_vertex, light_position, normal, light_color, light_base_color.a, specular_shininess, shadow_modulate, screen_uv, uv, base_color, false); + light_color.rgb *= light_base_color.rgb; + light_color = light_compute(light_vertex, light_position, normal, light_color, light_base_color.a, specular_shininess, shadow_modulate, screen_uv, uv, base_color, false); #else - light_color.rgb *= light_base_color.rgb * light_base_color.a; + light_color.rgb *= light_base_color.rgb * light_base_color.a; - if (normal_used) { - vec3 light_pos = vec3(light_array.data[light_base].position, light_array.data[light_base].height); - vec3 pos = light_vertex; - vec3 light_vec = normalize(light_pos - pos); + if (normal_used) { + vec3 light_pos = vec3(light_array.data[light_base].position, light_array.data[light_base].height); + vec3 pos = light_vertex; + vec3 light_vec = normalize(light_pos - pos); - light_color.rgb = light_normal_compute(light_vec, normal, base_color.rgb, light_color.rgb, specular_shininess, specular_shininess_used); - } else { - light_color.rgb *= base_color.rgb; - } + light_color.rgb = light_normal_compute(light_vec, normal, base_color.rgb, light_color.rgb, specular_shininess, specular_shininess_used); + } else { + light_color.rgb *= base_color.rgb; + } #endif - if (bool(light_array.data[light_base].flags & LIGHT_FLAGS_HAS_SHADOW)) { - vec2 shadow_pos = (vec4(shadow_vertex, 0.0, 1.0) * mat4(light_array.data[light_base].shadow_matrix[0], light_array.data[light_base].shadow_matrix[1], vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0))).xy; //multiply inverse given its transposed. Optimizer removes useless operations. - - vec2 pos_norm = normalize(shadow_pos); - vec2 pos_abs = abs(pos_norm); - vec2 pos_box = pos_norm / max(pos_abs.x, pos_abs.y); - vec2 pos_rot = pos_norm * mat2(vec2(0.7071067811865476, -0.7071067811865476), vec2(0.7071067811865476, 0.7071067811865476)); //is there a faster way to 45 degrees rot? - float tex_ofs; - float distance; - if (pos_rot.y > 0) { - if (pos_rot.x > 0) { - tex_ofs = pos_box.y * 0.125 + 0.125; - distance = shadow_pos.x; + if (bool(light_array.data[light_base].flags & LIGHT_FLAGS_HAS_SHADOW)) { + vec2 shadow_pos = (vec4(shadow_vertex, 0.0, 1.0) * mat4(light_array.data[light_base].shadow_matrix[0], light_array.data[light_base].shadow_matrix[1], vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0))).xy; //multiply inverse given its transposed. Optimizer removes useless operations. + + vec2 pos_norm = normalize(shadow_pos); + vec2 pos_abs = abs(pos_norm); + vec2 pos_box = pos_norm / max(pos_abs.x, pos_abs.y); + vec2 pos_rot = pos_norm * mat2(vec2(0.7071067811865476, -0.7071067811865476), vec2(0.7071067811865476, 0.7071067811865476)); //is there a faster way to 45 degrees rot? + float tex_ofs; + float distance; + if (pos_rot.y > 0) { + if (pos_rot.x > 0) { + tex_ofs = pos_box.y * 0.125 + 0.125; + distance = shadow_pos.x; + } else { + tex_ofs = pos_box.x * -0.125 + (0.25 + 0.125); + distance = shadow_pos.y; + } } else { - tex_ofs = pos_box.x * -0.125 + (0.25 + 0.125); - distance = shadow_pos.y; + if (pos_rot.x < 0) { + tex_ofs = pos_box.y * -0.125 + (0.5 + 0.125); + distance = -shadow_pos.x; + } else { + tex_ofs = pos_box.x * 0.125 + (0.75 + 0.125); + distance = -shadow_pos.y; + } } - } else { - if (pos_rot.x < 0) { - tex_ofs = pos_box.y * -0.125 + (0.5 + 0.125); - distance = -shadow_pos.x; - } else { - tex_ofs = pos_box.x * 0.125 + (0.75 + 0.125); - distance = -shadow_pos.y; - } - } - distance *= light_array.data[light_base].shadow_zfar_inv; + distance *= light_array.data[light_base].shadow_zfar_inv; - //float distance = length(shadow_pos); - vec4 shadow_uv = vec4(tex_ofs, light_array.data[light_base].shadow_y_ofs, distance, 1.0); + //float distance = length(shadow_pos); + vec4 shadow_uv = vec4(tex_ofs, light_array.data[light_base].shadow_y_ofs, distance, 1.0); - light_color = light_shadow_compute(light_base, light_color, shadow_uv + light_color = light_shadow_compute(light_base, light_color, shadow_uv #ifdef LIGHT_CODE_USED - , - shadow_modulate.rgb + , + shadow_modulate.rgb #endif - ); - } + ); + } - light_blend_compute(light_base, light_color, color.rgb); + light_blend_compute(light_base, light_color, color.rgb); #ifdef MODE_LIGHT_ONLY - light_only_alpha += light_color.a; + light_only_alpha += light_color.a; #endif + } } #endif diff --git a/servers/rendering/renderer_rd/shaders/canvas_uniforms_inc.glsl b/servers/rendering/renderer_rd/shaders/canvas_uniforms_inc.glsl index 7cf5b4576e6a..fc26a1c89c39 100644 --- a/servers/rendering/renderer_rd/shaders/canvas_uniforms_inc.glsl +++ b/servers/rendering/renderer_rd/shaders/canvas_uniforms_inc.glsl @@ -23,14 +23,14 @@ #define FLAGS_LIGHT_COUNT_SHIFT 20 -#define FLAGS_DEFAULT_NORMAL_MAP_USED (1 << 26) -#define FLAGS_DEFAULT_SPECULAR_MAP_USED (1 << 27) +#define FLAGS_DEFAULT_NORMAL_MAP_USED (1 << 24) +#define FLAGS_DEFAULT_SPECULAR_MAP_USED (1 << 25) -#define FLAGS_USE_MSDF (1 << 28) -#define FLAGS_USE_LCD (1 << 29) +#define FLAGS_USE_MSDF (1 << 26) +#define FLAGS_USE_LCD (1 << 27) -#define FLAGS_FLIP_H (1 << 30) -#define FLAGS_FLIP_V (1 << 31) +#define FLAGS_FLIP_H (1 << 28) +#define FLAGS_FLIP_V (1 << 29) struct InstanceData { vec2 world_x; @@ -61,12 +61,40 @@ instances; layout(push_constant, std430) uniform Params { uint base_instance_index; // base index to instance data - uint pad1; + uint sc_packed_0; uint pad2; uint pad3; } params; +// Specialization constants. + +#ifdef UBERSHADER + +// Pull the constants from the draw call's push constants. +uint sc_packed_0() { + return draw_call.sc_packed_0; +} + +#else + +// Pull the constants from the pipeline's specialization constants. +layout(constant_id = 0) const uint pso_sc_packed_0 = 0; + +uint sc_packed_0() { + return pso_sc_packed_0; +} + +#endif + +bool sc_use_lighting() { + return ((sc_packed_0() >> 0) & 1U) != 0; +} + +bool sc_use_point_size() { + return ((sc_packed_0() >> 1) & 1U) != 0; +} + // In vulkan, sets should always be ordered using the following logic: // Lower Sets: Sets that change format and layout less often // Higher sets: Sets that change format and layout very often diff --git a/servers/rendering/renderer_rd/shaders/forward_clustered/scene_forward_clustered.glsl b/servers/rendering/renderer_rd/shaders/forward_clustered/scene_forward_clustered.glsl index 400451ec36b0..3490bf8f7d8e 100644 --- a/servers/rendering/renderer_rd/shaders/forward_clustered/scene_forward_clustered.glsl +++ b/servers/rendering/renderer_rd/shaders/forward_clustered/scene_forward_clustered.glsl @@ -643,29 +643,6 @@ void main() { #define SHADER_IS_SRGB false #define SHADER_SPACE_FAR 0.0 -/* Specialization Constants (Toggles) */ - -layout(constant_id = 0) const bool sc_use_forward_gi = false; -layout(constant_id = 1) const bool sc_use_light_projector = false; -layout(constant_id = 2) const bool sc_use_light_soft_shadows = false; -layout(constant_id = 3) const bool sc_use_directional_soft_shadows = false; - -/* Specialization Constants (Values) */ - -layout(constant_id = 6) const uint sc_soft_shadow_samples = 4; -layout(constant_id = 7) const uint sc_penumbra_shadow_samples = 4; - -layout(constant_id = 8) const uint sc_directional_soft_shadow_samples = 4; -layout(constant_id = 9) const uint sc_directional_penumbra_shadow_samples = 4; - -layout(constant_id = 10) const bool sc_decal_use_mipmaps = true; -layout(constant_id = 11) const bool sc_projector_use_mipmaps = true; -layout(constant_id = 12) const bool sc_use_depth_fog = false; -layout(constant_id = 13) const bool sc_use_lightmap_bicubic_filter = false; - -// not used in clustered renderer but we share some code with the mobile renderer that requires this. -const float sc_luminance_multiplier = 1.0; - #include "scene_forward_clustered_inc.glsl" /* Varyings */ @@ -917,7 +894,7 @@ vec4 fog_process(vec3 vertex) { float fog_amount = 0.0; - if (sc_use_depth_fog) { + if (sc_use_depth_fog()) { float fog_z = smoothstep(scene_data_block.data.fog_depth_begin, scene_data_block.data.fog_depth_end, length(vertex)); float fog_quad_amount = pow(fog_z, scene_data_block.data.fog_depth_curve) * scene_data_block.data.fog_density; fog_amount = fog_quad_amount; @@ -1305,7 +1282,7 @@ void fragment_shader(in SceneData scene_data) { if (decals.data[decal_index].albedo_rect != vec4(0.0)) { //has albedo vec4 decal_albedo; - if (sc_decal_use_mipmaps) { + if (sc_decal_use_mipmaps()) { decal_albedo = textureGrad(sampler2D(decal_atlas_srgb, decal_sampler), uv_local.xz * decals.data[decal_index].albedo_rect.zw + decals.data[decal_index].albedo_rect.xy, ddx * decals.data[decal_index].albedo_rect.zw, ddy * decals.data[decal_index].albedo_rect.zw); } else { decal_albedo = textureLod(sampler2D(decal_atlas_srgb, decal_sampler), uv_local.xz * decals.data[decal_index].albedo_rect.zw + decals.data[decal_index].albedo_rect.xy, 0.0); @@ -1316,7 +1293,7 @@ void fragment_shader(in SceneData scene_data) { if (decals.data[decal_index].normal_rect != vec4(0.0)) { vec3 decal_normal; - if (sc_decal_use_mipmaps) { + if (sc_decal_use_mipmaps()) { decal_normal = textureGrad(sampler2D(decal_atlas, decal_sampler), uv_local.xz * decals.data[decal_index].normal_rect.zw + decals.data[decal_index].normal_rect.xy, ddx * decals.data[decal_index].normal_rect.zw, ddy * decals.data[decal_index].normal_rect.zw).xyz; } else { decal_normal = textureLod(sampler2D(decal_atlas, decal_sampler), uv_local.xz * decals.data[decal_index].normal_rect.zw + decals.data[decal_index].normal_rect.xy, 0.0).xyz; @@ -1331,7 +1308,7 @@ void fragment_shader(in SceneData scene_data) { if (decals.data[decal_index].orm_rect != vec4(0.0)) { vec3 decal_orm; - if (sc_decal_use_mipmaps) { + if (sc_decal_use_mipmaps()) { decal_orm = textureGrad(sampler2D(decal_atlas, decal_sampler), uv_local.xz * decals.data[decal_index].orm_rect.zw + decals.data[decal_index].orm_rect.xy, ddx * decals.data[decal_index].orm_rect.zw, ddy * decals.data[decal_index].orm_rect.zw).xyz; } else { decal_orm = textureLod(sampler2D(decal_atlas, decal_sampler), uv_local.xz * decals.data[decal_index].orm_rect.zw + decals.data[decal_index].orm_rect.xy, 0.0).xyz; @@ -1344,7 +1321,7 @@ void fragment_shader(in SceneData scene_data) { if (decals.data[decal_index].emission_rect != vec4(0.0)) { //emission is additive, so its independent from albedo - if (sc_decal_use_mipmaps) { + if (sc_decal_use_mipmaps()) { emission += textureGrad(sampler2D(decal_atlas_srgb, decal_sampler), uv_local.xz * decals.data[decal_index].emission_rect.zw + decals.data[decal_index].emission_rect.xy, ddx * decals.data[decal_index].emission_rect.zw, ddy * decals.data[decal_index].emission_rect.zw).xyz * decals.data[decal_index].modulate.rgb * decals.data[decal_index].emission_energy * fade; } else { emission += textureLod(sampler2D(decal_atlas_srgb, decal_sampler), uv_local.xz * decals.data[decal_index].emission_rect.zw + decals.data[decal_index].emission_rect.xy, 0.0).xyz * decals.data[decal_index].modulate.rgb * decals.data[decal_index].emission_energy * fade; @@ -1520,7 +1497,7 @@ void fragment_shader(in SceneData scene_data) { vec3 lm_light_l1_0; vec3 lm_light_l1p1; - if (sc_use_lightmap_bicubic_filter) { + if (sc_use_lightmap_bicubic_filter()) { lm_light_l0 = textureArray_bicubic(lightmap_textures[ofs], uvw + vec3(0.0, 0.0, 0.0), lightmaps.data[ofs].light_texture_size).rgb; lm_light_l1n1 = (textureArray_bicubic(lightmap_textures[ofs], uvw + vec3(0.0, 0.0, 1.0), lightmaps.data[ofs].light_texture_size).rgb - vec3(0.5)) * 2.0; lm_light_l1_0 = (textureArray_bicubic(lightmap_textures[ofs], uvw + vec3(0.0, 0.0, 2.0), lightmaps.data[ofs].light_texture_size).rgb - vec3(0.5)) * 2.0; @@ -1541,7 +1518,7 @@ void fragment_shader(in SceneData scene_data) { ambient_light += lm_light_l1p1 * n.x * (lm_light_l0 * en * 4.0); } else { - if (sc_use_lightmap_bicubic_filter) { + if (sc_use_lightmap_bicubic_filter()) { ambient_light += textureArray_bicubic(lightmap_textures[ofs], uvw, lightmaps.data[ofs].light_texture_size).rgb * lightmaps.data[ofs].exposure_normalization; } else { ambient_light += textureLod(sampler2DArray(lightmap_textures[ofs], SAMPLER_LINEAR_CLAMP), uvw, 0.0).rgb * lightmaps.data[ofs].exposure_normalization; @@ -1550,7 +1527,7 @@ void fragment_shader(in SceneData scene_data) { } #else - if (sc_use_forward_gi && bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_SDFGI)) { //has lightmap capture + if (sc_use_forward_gi() && bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_SDFGI)) { //has lightmap capture //make vertex orientation the world one, but still align to camera vec3 cam_pos = mat3(scene_data.inv_view_matrix) * vertex; @@ -1622,7 +1599,7 @@ void fragment_shader(in SceneData scene_data) { } } - if (sc_use_forward_gi && bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_VOXEL_GI)) { // process voxel_gi_instances + if (sc_use_forward_gi() && bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_VOXEL_GI)) { // process voxel_gi_instances uint index1 = instances.data[instance_index].gi_offset & 0xFFFF; // Make vertex orientation the world one, but still align to camera. vec3 cam_pos = mat3(scene_data.inv_view_matrix) * vertex; @@ -1657,7 +1634,7 @@ void fragment_shader(in SceneData scene_data) { ambient_light = amb_accum.rgb; } - if (!sc_use_forward_gi && bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_GI_BUFFERS)) { //use GI buffers + if (!sc_use_forward_gi() && bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_GI_BUFFERS)) { //use GI buffers vec2 coord; @@ -1870,7 +1847,7 @@ void fragment_shader(in SceneData scene_data) { m_var.xyz += normal_bias; //version with soft shadows, more expensive - if (sc_use_directional_soft_shadows && directional_lights.data[i].softshadow_angle > 0) { + if (sc_use_directional_soft_shadows() && directional_lights.data[i].softshadow_angle > 0) { uint blend_count = 0; const uint blend_max = directional_lights.data[i].blend_splits ? 2 : 1; @@ -2144,7 +2121,7 @@ void fragment_shader(in SceneData scene_data) { shadow = 1.0; #endif - float size_A = sc_use_directional_soft_shadows ? directional_lights.data[i].size : 0.0; + float size_A = sc_use_directional_soft_shadows() ? directional_lights.data[i].size : 0.0; light_compute(normal, directional_lights.data[i].direction, normalize(view), size_A, #ifndef DEBUG_DRAW_PSSM_SPLITS @@ -2552,6 +2529,14 @@ void fragment_shader(in SceneData scene_data) { } void main() { +#ifdef UBERSHADER + bool front_facing = gl_FrontFacing; + if (uc_cull_mode() == POLYGON_CULL_BACK && !front_facing) { + discard; + } else if (uc_cull_mode() == POLYGON_CULL_FRONT && front_facing) { + discard; + } +#endif #ifdef MODE_DUAL_PARABOLOID if (dp_clip > 0.0) diff --git a/servers/rendering/renderer_rd/shaders/forward_clustered/scene_forward_clustered_inc.glsl b/servers/rendering/renderer_rd/shaders/forward_clustered/scene_forward_clustered_inc.glsl index 03511aa3a8c8..9f68d59be2b9 100644 --- a/servers/rendering/renderer_rd/shaders/forward_clustered/scene_forward_clustered_inc.glsl +++ b/servers/rendering/renderer_rd/shaders/forward_clustered/scene_forward_clustered_inc.glsl @@ -37,9 +37,96 @@ layout(push_constant, std430) uniform DrawCall { uint uv_offset; uint multimesh_motion_vectors_current_offset; uint multimesh_motion_vectors_previous_offset; +#ifdef UBERSHADER + uint sc_packed_0; + uint sc_packed_1; + uint sc_packed_2; + uint uc_packed_0; +#endif } draw_call; +/* Specialization Constants */ + +#ifdef UBERSHADER + +#define POLYGON_CULL_DISABLED 0 +#define POLYGON_CULL_FRONT 1 +#define POLYGON_CULL_BACK 2 + +// Pull the constants from the draw call's push constants. +uint sc_packed_0() { + return draw_call.sc_packed_0; +} + +uint uc_cull_mode() { + return (draw_call.uc_packed_0 >> 0) & 3U; +} + +#else + +// Pull the constants from the pipeline's specialization constants. +layout(constant_id = 0) const uint pso_sc_packed_0 = 0; + +uint sc_packed_0() { + return pso_sc_packed_0; +} + +#endif + +bool sc_use_forward_gi() { + return ((sc_packed_0() >> 0) & 1U) != 0; +} + +bool sc_use_light_projector() { + return ((sc_packed_0() >> 1) & 1U) != 0; +} + +bool sc_use_light_soft_shadows() { + return ((sc_packed_0() >> 2) & 1U) != 0; +} + +bool sc_use_directional_soft_shadows() { + return ((sc_packed_0() >> 3) & 1U) != 0; +} + +bool sc_decal_use_mipmaps() { + return ((sc_packed_0() >> 4) & 1U) != 0; +} + +bool sc_projector_use_mipmaps() { + return ((sc_packed_0() >> 5) & 1U) != 0; +} + +bool sc_use_depth_fog() { + return ((sc_packed_0() >> 6) & 1U) != 0; +} + +bool sc_use_lightmap_bicubic_filter() { + return ((sc_packed_0() >> 7) & 1U) != 0; +} + +uint sc_soft_shadow_samples() { + return (sc_packed_0() >> 8) & 15U; +} + +uint sc_penumbra_shadow_samples() { + return (sc_packed_0() >> 12) & 15U; +} + +uint sc_directional_soft_shadow_samples() { + return (sc_packed_0() >> 16) & 15U; +} + +uint sc_directional_penumbra_shadow_samples() { + return (sc_packed_0() >> 20) & 15U; +} + +float sc_luminance_multiplier() { + // Not used in clustered renderer but we share some code with the mobile renderer that requires this. + return 1.0; +} + #define SDFGI_MAX_CASCADES 8 /* Set 0: Base Pass (never changes) */ diff --git a/servers/rendering/renderer_rd/shaders/forward_mobile/scene_forward_mobile.glsl b/servers/rendering/renderer_rd/shaders/forward_mobile/scene_forward_mobile.glsl index 17c7b756c3ff..cac9d925526f 100644 --- a/servers/rendering/renderer_rd/shaders/forward_mobile/scene_forward_mobile.glsl +++ b/servers/rendering/renderer_rd/shaders/forward_mobile/scene_forward_mobile.glsl @@ -77,10 +77,6 @@ void axis_angle_to_tbn(vec3 axis, float angle, out vec3 tangent, out vec3 binorm normal = omc_axis.zzz * axis + vec3(-s_axis.y, s_axis.x, c); } -/* Spec Constants */ - -layout(constant_id = 17) const bool sc_is_multimesh = false; - /* Varyings */ layout(location = 0) highp out vec3 vertex_interp; @@ -206,7 +202,7 @@ void main() { mat4 matrix; mat4 read_model_matrix = model_matrix; - if (sc_is_multimesh) { + if (sc_is_multimesh()) { //multimesh, instances are for it #ifdef USE_PARTICLE_TRAILS @@ -402,7 +398,7 @@ void main() { // Then we combine the translations from the model matrix and the view matrix using emulated doubles. // We add the result to the vertex and ignore the final lost precision. vec3 model_origin = model_matrix[3].xyz; - if (sc_is_multimesh) { + if (sc_is_multimesh()) { vertex = mat3(matrix) * vertex; model_origin = double_add_vec3(model_origin, model_precision, matrix[3].xyz, vec3(0.0), model_precision); } @@ -501,41 +497,6 @@ void main() { #define SHADER_IS_SRGB false #define SHADER_SPACE_FAR 0.0 -/* Specialization Constants */ - -#if !defined(MODE_RENDER_DEPTH) - -#if !defined(MODE_UNSHADED) - -layout(constant_id = 0) const bool sc_use_light_projector = false; -layout(constant_id = 1) const bool sc_use_light_soft_shadows = false; -layout(constant_id = 2) const bool sc_use_directional_soft_shadows = false; - -layout(constant_id = 3) const uint sc_soft_shadow_samples = 4; -layout(constant_id = 4) const uint sc_penumbra_shadow_samples = 4; - -layout(constant_id = 5) const uint sc_directional_soft_shadow_samples = 4; -layout(constant_id = 6) const uint sc_directional_penumbra_shadow_samples = 4; - -layout(constant_id = 8) const bool sc_projector_use_mipmaps = true; - -layout(constant_id = 9) const bool sc_disable_omni_lights = false; -layout(constant_id = 10) const bool sc_disable_spot_lights = false; -layout(constant_id = 11) const bool sc_disable_reflection_probes = false; -layout(constant_id = 12) const bool sc_disable_directional_lights = false; -layout(constant_id = 18) const bool sc_use_lightmap_bicubic_filter = false; - -#endif //!MODE_UNSHADED - -layout(constant_id = 7) const bool sc_decal_use_mipmaps = true; -layout(constant_id = 13) const bool sc_disable_decals = false; -layout(constant_id = 14) const bool sc_disable_fog = false; -layout(constant_id = 16) const bool sc_use_depth_fog = false; - -#endif //!MODE_RENDER_DEPTH - -layout(constant_id = 15) const float sc_luminance_multiplier = 2.0; - /* Include our forward mobile UBOs definitions etc. */ #include "scene_forward_mobile_inc.glsl" @@ -759,7 +720,7 @@ vec4 fog_process(vec3 vertex) { float fog_amount = 0.0; - if (sc_use_depth_fog) { + if (sc_use_depth_fog()) { float fog_z = smoothstep(scene_data_block.data.fog_depth_begin, scene_data_block.data.fog_depth_end, length(vertex)); float fog_quad_amount = pow(fog_z, scene_data_block.data.fog_depth_curve) * scene_data_block.data.fog_density; fog_amount = fog_quad_amount; @@ -785,6 +746,14 @@ vec4 fog_process(vec3 vertex) { #define scene_data scene_data_block.data void main() { +#ifdef UBERSHADER + bool front_facing = gl_FrontFacing; + if (uc_cull_mode() == POLYGON_CULL_BACK && !front_facing) { + discard; + } else if (uc_cull_mode() == POLYGON_CULL_FRONT && front_facing) { + discard; + } +#endif #ifdef MODE_DUAL_PARABOLOID if (dp_clip > 0.0) @@ -1010,7 +979,7 @@ void main() { // to maximize VGPR usage // Draw "fixed" fog before volumetric fog to ensure volumetric fog can appear in front of the sky. - if (!sc_disable_fog && scene_data.fog_enabled) { + if (!sc_disable_fog() && scene_data.fog_enabled) { fog = fog_process(vertex); } @@ -1029,7 +998,7 @@ void main() { vec3 vertex_ddx = dFdx(vertex); vec3 vertex_ddy = dFdy(vertex); - if (!sc_disable_decals) { //Decals + if (!sc_disable_decals()) { //Decals // must implement uint decal_indices = instances.data[draw_call.instance_index].decals.x; @@ -1067,7 +1036,7 @@ void main() { if (decals.data[decal_index].albedo_rect != vec4(0.0)) { //has albedo vec4 decal_albedo; - if (sc_decal_use_mipmaps) { + if (sc_decal_use_mipmaps()) { decal_albedo = textureGrad(sampler2D(decal_atlas_srgb, decal_sampler), uv_local.xz * decals.data[decal_index].albedo_rect.zw + decals.data[decal_index].albedo_rect.xy, ddx * decals.data[decal_index].albedo_rect.zw, ddy * decals.data[decal_index].albedo_rect.zw); } else { decal_albedo = textureLod(sampler2D(decal_atlas_srgb, decal_sampler), uv_local.xz * decals.data[decal_index].albedo_rect.zw + decals.data[decal_index].albedo_rect.xy, 0.0); @@ -1078,7 +1047,7 @@ void main() { if (decals.data[decal_index].normal_rect != vec4(0.0)) { vec3 decal_normal; - if (sc_decal_use_mipmaps) { + if (sc_decal_use_mipmaps()) { decal_normal = textureGrad(sampler2D(decal_atlas, decal_sampler), uv_local.xz * decals.data[decal_index].normal_rect.zw + decals.data[decal_index].normal_rect.xy, ddx * decals.data[decal_index].normal_rect.zw, ddy * decals.data[decal_index].normal_rect.zw).xyz; } else { decal_normal = textureLod(sampler2D(decal_atlas, decal_sampler), uv_local.xz * decals.data[decal_index].normal_rect.zw + decals.data[decal_index].normal_rect.xy, 0.0).xyz; @@ -1093,7 +1062,7 @@ void main() { if (decals.data[decal_index].orm_rect != vec4(0.0)) { vec3 decal_orm; - if (sc_decal_use_mipmaps) { + if (sc_decal_use_mipmaps()) { decal_orm = textureGrad(sampler2D(decal_atlas, decal_sampler), uv_local.xz * decals.data[decal_index].orm_rect.zw + decals.data[decal_index].orm_rect.xy, ddx * decals.data[decal_index].orm_rect.zw, ddy * decals.data[decal_index].orm_rect.zw).xyz; } else { decal_orm = textureLod(sampler2D(decal_atlas, decal_sampler), uv_local.xz * decals.data[decal_index].orm_rect.zw + decals.data[decal_index].orm_rect.xy, 0.0).xyz; @@ -1106,7 +1075,7 @@ void main() { if (decals.data[decal_index].emission_rect != vec4(0.0)) { //emission is additive, so its independent from albedo - if (sc_decal_use_mipmaps) { + if (sc_decal_use_mipmaps()) { emission += textureGrad(sampler2D(decal_atlas_srgb, decal_sampler), uv_local.xz * decals.data[decal_index].emission_rect.zw + decals.data[decal_index].emission_rect.xy, ddx * decals.data[decal_index].emission_rect.zw, ddy * decals.data[decal_index].emission_rect.zw).xyz * decals.data[decal_index].emission_energy * fade; } else { emission += textureLod(sampler2D(decal_atlas_srgb, decal_sampler), uv_local.xz * decals.data[decal_index].emission_rect.zw + decals.data[decal_index].emission_rect.xy, 0.0).xyz * decals.data[decal_index].emission_energy * fade; @@ -1170,7 +1139,7 @@ void main() { specular_light = textureLod(samplerCube(radiance_cubemap, DEFAULT_SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP), ref_vec, sqrt(roughness) * MAX_ROUGHNESS_LOD).rgb; #endif //USE_RADIANCE_CUBEMAP_ARRAY - specular_light *= sc_luminance_multiplier; + specular_light *= sc_luminance_multiplier(); specular_light *= scene_data.IBL_exposure_normalization; specular_light *= horizon * horizon; specular_light *= scene_data.ambient_light_color_energy.a; @@ -1192,7 +1161,7 @@ void main() { #else vec3 cubemap_ambient = textureLod(samplerCube(radiance_cubemap, DEFAULT_SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP), ambient_dir, MAX_ROUGHNESS_LOD).rgb; #endif //USE_RADIANCE_CUBEMAP_ARRAY - cubemap_ambient *= sc_luminance_multiplier; + cubemap_ambient *= sc_luminance_multiplier(); cubemap_ambient *= scene_data.IBL_exposure_normalization; ambient_light = mix(ambient_light, cubemap_ambient * scene_data.ambient_light_color_energy.a, scene_data.ambient_color_sky_mix); } @@ -1278,7 +1247,7 @@ void main() { vec3 lm_light_l1_0; vec3 lm_light_l1p1; - if (sc_use_lightmap_bicubic_filter) { + if (sc_use_lightmap_bicubic_filter()) { lm_light_l0 = textureArray_bicubic(lightmap_textures[ofs], uvw + vec3(0.0, 0.0, 0.0), lightmaps.data[ofs].light_texture_size).rgb; lm_light_l1n1 = (textureArray_bicubic(lightmap_textures[ofs], uvw + vec3(0.0, 0.0, 1.0), lightmaps.data[ofs].light_texture_size).rgb - vec3(0.5)) * 2.0; lm_light_l1_0 = (textureArray_bicubic(lightmap_textures[ofs], uvw + vec3(0.0, 0.0, 2.0), lightmaps.data[ofs].light_texture_size).rgb - vec3(0.5)) * 2.0; @@ -1298,7 +1267,7 @@ void main() { ambient_light += lm_light_l1_0 * n.z * (lm_light_l0 * exposure_normalization * 4.0); ambient_light += lm_light_l1p1 * n.x * (lm_light_l0 * exposure_normalization * 4.0); } else { - if (sc_use_lightmap_bicubic_filter) { + if (sc_use_lightmap_bicubic_filter()) { ambient_light += textureArray_bicubic(lightmap_textures[ofs], uvw, lightmaps.data[ofs].light_texture_size).rgb * lightmaps.data[ofs].exposure_normalization; } else { ambient_light += textureLod(sampler2DArray(lightmap_textures[ofs], SAMPLER_LINEAR_CLAMP), uvw, 0.0).rgb * lightmaps.data[ofs].exposure_normalization; @@ -1312,7 +1281,7 @@ void main() { // skipping ssao, do we remove ssao totally? - if (!sc_disable_reflection_probes) { //Reflection probes + if (!sc_disable_reflection_probes()) { //Reflection probes vec4 reflection_accum = vec4(0.0, 0.0, 0.0, 0.0); vec4 ambient_accum = vec4(0.0, 0.0, 0.0, 0.0); @@ -1402,7 +1371,7 @@ void main() { // LIGHTING #if !defined(MODE_RENDER_DEPTH) && !defined(MODE_UNSHADED) - if (!sc_disable_directional_lights) { //directional light + if (!sc_disable_directional_lights()) { //directional light #ifndef SHADOWS_DISABLED // Do shadow and lighting in two passes to reduce register pressure uint shadow0 = 0; @@ -1753,7 +1722,7 @@ void main() { } } //directional light - if (!sc_disable_omni_lights) { //omni lights + if (!sc_disable_omni_lights()) { //omni lights uint light_indices = instances.data[draw_call.instance_index].omni_lights.x; for (uint i = 0; i < 8; i++) { uint light_index = light_indices & 0xFF; @@ -1797,7 +1766,7 @@ void main() { } } //omni lights - if (!sc_disable_spot_lights) { //spot lights + if (!sc_disable_spot_lights()) { //spot lights uint light_indices = instances.data[draw_call.instance_index].spot_lights.x; for (uint i = 0; i < 8; i++) { @@ -1932,7 +1901,7 @@ void main() { // On mobile we use a UNORM buffer with 10bpp which results in a range from 0.0 - 1.0 resulting in HDR breaking // We divide by sc_luminance_multiplier to support a range from 0.0 - 2.0 both increasing precision on bright and darker images - frag_color.rgb = frag_color.rgb / sc_luminance_multiplier; + frag_color.rgb = frag_color.rgb / sc_luminance_multiplier(); #ifdef PREMUL_ALPHA_USED frag_color.rgb *= premul_alpha; #endif diff --git a/servers/rendering/renderer_rd/shaders/forward_mobile/scene_forward_mobile_inc.glsl b/servers/rendering/renderer_rd/shaders/forward_mobile/scene_forward_mobile_inc.glsl index d971ff04c5eb..495e52a29e4c 100644 --- a/servers/rendering/renderer_rd/shaders/forward_mobile/scene_forward_mobile_inc.glsl +++ b/servers/rendering/renderer_rd/shaders/forward_mobile/scene_forward_mobile_inc.glsl @@ -20,9 +20,128 @@ layout(push_constant, std430) uniform DrawCall { vec2 uv_offset; uint instance_index; uint pad; +#ifdef UBERSHADER + uint sc_packed_0; + float sc_packed_1; + uint sc_packed_2; + uint uc_packed_0; +#endif } draw_call; +/* Specialization Constants */ + +#ifdef UBERSHADER + +#define POLYGON_CULL_DISABLED 0 +#define POLYGON_CULL_FRONT 1 +#define POLYGON_CULL_BACK 2 + +// Pull the constants from the draw call's push constants. +uint sc_packed_0() { + return draw_call.sc_packed_0; +} + +float sc_packed_1() { + return draw_call.sc_packed_1; +} + +uint uc_cull_mode() { + return (draw_call.uc_packed_0 >> 0) & 3U; +} + +#else + +// Pull the constants from the pipeline's specialization constants. +layout(constant_id = 0) const uint pso_sc_packed_0 = 0; +layout(constant_id = 1) const float pso_sc_packed_1 = 2.0; + +uint sc_packed_0() { + return pso_sc_packed_0; +} + +float sc_packed_1() { + return pso_sc_packed_1; +} + +#endif + +bool sc_use_light_projector() { + return ((sc_packed_0() >> 0) & 1U) != 0; +} + +bool sc_use_light_soft_shadows() { + return ((sc_packed_0() >> 1) & 1U) != 0; +} + +bool sc_use_directional_soft_shadows() { + return ((sc_packed_0() >> 2) & 1U) != 0; +} + +bool sc_decal_use_mipmaps() { + return ((sc_packed_0() >> 3) & 1U) != 0; +} + +bool sc_projector_use_mipmaps() { + return ((sc_packed_0() >> 4) & 1U) != 0; +} + +bool sc_disable_omni_lights() { + return ((sc_packed_0() >> 5) & 1U) != 0; +} + +bool sc_disable_spot_lights() { + return ((sc_packed_0() >> 6) & 1U) != 0; +} + +bool sc_disable_reflection_probes() { + return ((sc_packed_0() >> 7) & 1U) != 0; +} + +bool sc_disable_directional_lights() { + return ((sc_packed_0() >> 8) & 1U) != 0; +} + +bool sc_disable_decals() { + return ((sc_packed_0() >> 9) & 1U) != 0; +} + +bool sc_disable_fog() { + return ((sc_packed_0() >> 10) & 1U) != 0; +} + +bool sc_use_depth_fog() { + return ((sc_packed_0() >> 11) & 1U) != 0; +} + +bool sc_is_multimesh() { + return ((sc_packed_0() >> 12) & 1U) != 0; +} + +bool sc_use_lightmap_bicubic_filter() { + return ((sc_packed_0() >> 13) & 1U) != 0; +} + +uint sc_soft_shadow_samples() { + return (sc_packed_0() >> 16) & 15U; +} + +uint sc_penumbra_shadow_samples() { + return (sc_packed_0() >> 20) & 15U; +} + +uint sc_directional_soft_shadow_samples() { + return (sc_packed_0() >> 24) & 15U; +} + +uint sc_directional_penumbra_shadow_samples() { + return (sc_packed_0() >> 28) & 15U; +} + +float sc_luminance_multiplier() { + return sc_packed_1(); +} + /* Set 0: Base Pass (never changes) */ #include "../light_data_inc.glsl" diff --git a/servers/rendering/renderer_rd/shaders/scene_forward_lights_inc.glsl b/servers/rendering/renderer_rd/shaders/scene_forward_lights_inc.glsl index 14a4dc7089d7..a1a185d0fdf5 100644 --- a/servers/rendering/renderer_rd/shaders/scene_forward_lights_inc.glsl +++ b/servers/rendering/renderer_rd/shaders/scene_forward_lights_inc.glsl @@ -269,7 +269,7 @@ float sample_directional_pcf_shadow(texture2D shadow, vec2 shadow_pixel_size, ve float depth = coord.z; //if only one sample is taken, take it from the center - if (sc_directional_soft_shadow_samples == 0) { + if (sc_directional_soft_shadow_samples() == 0) { return textureProj(sampler2DShadow(shadow, shadow_sampler), vec4(pos, depth, 1.0)); } @@ -283,11 +283,11 @@ float sample_directional_pcf_shadow(texture2D shadow, vec2 shadow_pixel_size, ve float avg = 0.0; - for (uint i = 0; i < sc_directional_soft_shadow_samples; i++) { + for (uint i = 0; i < sc_directional_soft_shadow_samples(); i++) { avg += textureProj(sampler2DShadow(shadow, shadow_sampler), vec4(pos + shadow_pixel_size * (disk_rotation * scene_data_block.data.directional_soft_shadow_kernel[i].xy), depth, 1.0)); } - return avg * (1.0 / float(sc_directional_soft_shadow_samples)); + return avg * (1.0 / float(sc_directional_soft_shadow_samples())); } float sample_pcf_shadow(texture2D shadow, vec2 shadow_pixel_size, vec3 coord, float taa_frame_count) { @@ -295,7 +295,7 @@ float sample_pcf_shadow(texture2D shadow, vec2 shadow_pixel_size, vec3 coord, fl float depth = coord.z; //if only one sample is taken, take it from the center - if (sc_soft_shadow_samples == 0) { + if (sc_soft_shadow_samples() == 0) { return textureProj(sampler2DShadow(shadow, shadow_sampler), vec4(pos, depth, 1.0)); } @@ -309,16 +309,16 @@ float sample_pcf_shadow(texture2D shadow, vec2 shadow_pixel_size, vec3 coord, fl float avg = 0.0; - for (uint i = 0; i < sc_soft_shadow_samples; i++) { + for (uint i = 0; i < sc_soft_shadow_samples(); i++) { avg += textureProj(sampler2DShadow(shadow, shadow_sampler), vec4(pos + shadow_pixel_size * (disk_rotation * scene_data_block.data.soft_shadow_kernel[i].xy), depth, 1.0)); } - return avg * (1.0 / float(sc_soft_shadow_samples)); + return avg * (1.0 / float(sc_soft_shadow_samples())); } float sample_omni_pcf_shadow(texture2D shadow, float blur_scale, vec2 coord, vec4 uv_rect, vec2 flip_offset, float depth, float taa_frame_count) { //if only one sample is taken, take it from the center - if (sc_soft_shadow_samples == 0) { + if (sc_soft_shadow_samples() == 0) { vec2 pos = coord * 0.5 + 0.5; pos = uv_rect.xy + pos * uv_rect.zw; return textureProj(sampler2DShadow(shadow, shadow_sampler), vec4(pos, depth, 1.0)); @@ -335,7 +335,7 @@ float sample_omni_pcf_shadow(texture2D shadow, float blur_scale, vec2 coord, vec float avg = 0.0; vec2 offset_scale = blur_scale * 2.0 * scene_data_block.data.shadow_atlas_pixel_size / uv_rect.zw; - for (uint i = 0; i < sc_soft_shadow_samples; i++) { + for (uint i = 0; i < sc_soft_shadow_samples(); i++) { vec2 offset = offset_scale * (disk_rotation * scene_data_block.data.soft_shadow_kernel[i].xy); vec2 sample_coord = coord + offset; @@ -356,7 +356,7 @@ float sample_omni_pcf_shadow(texture2D shadow, float blur_scale, vec2 coord, vec avg += textureProj(sampler2DShadow(shadow, shadow_sampler), vec4(sample_coord, depth, 1.0)); } - return avg * (1.0 / float(sc_soft_shadow_samples)); + return avg * (1.0 / float(sc_soft_shadow_samples())); } float sample_directional_soft_shadow(texture2D shadow, vec3 pssm_coord, vec2 tex_scale, float taa_frame_count) { @@ -372,7 +372,7 @@ float sample_directional_soft_shadow(texture2D shadow, vec3 pssm_coord, vec2 tex disk_rotation = mat2(vec2(cr, -sr), vec2(sr, cr)); } - for (uint i = 0; i < sc_directional_penumbra_shadow_samples; i++) { + for (uint i = 0; i < sc_directional_penumbra_shadow_samples(); i++) { vec2 suv = pssm_coord.xy + (disk_rotation * scene_data_block.data.directional_penumbra_shadow_kernel[i].xy) * tex_scale; float d = textureLod(sampler2D(shadow, SAMPLER_LINEAR_CLAMP), suv, 0.0).r; if (d > pssm_coord.z) { @@ -388,12 +388,12 @@ float sample_directional_soft_shadow(texture2D shadow, vec3 pssm_coord, vec2 tex tex_scale *= penumbra; float s = 0.0; - for (uint i = 0; i < sc_directional_penumbra_shadow_samples; i++) { + for (uint i = 0; i < sc_directional_penumbra_shadow_samples(); i++) { vec2 suv = pssm_coord.xy + (disk_rotation * scene_data_block.data.directional_penumbra_shadow_kernel[i].xy) * tex_scale; s += textureProj(sampler2DShadow(shadow, shadow_sampler), vec4(suv, pssm_coord.z, 1.0)); } - return s / float(sc_directional_penumbra_shadow_samples); + return s / float(sc_directional_penumbra_shadow_samples()); } else { //no blockers found, so no shadow @@ -434,7 +434,7 @@ float light_process_omni_shadow(uint idx, vec3 vertex, vec3 normal, float taa_fr float shadow; - if (sc_use_light_soft_shadows && omni_lights.data[idx].soft_shadow_size > 0.0) { + if (sc_use_light_soft_shadows() && omni_lights.data[idx].soft_shadow_size > 0.0) { //soft shadow //find blocker @@ -459,7 +459,7 @@ float light_process_omni_shadow(uint idx, vec3 vertex, vec3 normal, float taa_fr tangent *= omni_lights.data[idx].soft_shadow_size * omni_lights.data[idx].soft_shadow_scale; bitangent *= omni_lights.data[idx].soft_shadow_size * omni_lights.data[idx].soft_shadow_scale; - for (uint i = 0; i < sc_penumbra_shadow_samples; i++) { + for (uint i = 0; i < sc_penumbra_shadow_samples(); i++) { vec2 disk = disk_rotation * scene_data_block.data.penumbra_shadow_kernel[i].xy; vec3 pos = local_vert + tangent * disk.x + bitangent * disk.y; @@ -495,7 +495,7 @@ float light_process_omni_shadow(uint idx, vec3 vertex, vec3 normal, float taa_fr z_norm += omni_lights.data[idx].inv_radius * omni_lights.data[idx].shadow_bias; shadow = 0.0; - for (uint i = 0; i < sc_penumbra_shadow_samples; i++) { + for (uint i = 0; i < sc_penumbra_shadow_samples(); i++) { vec2 disk = disk_rotation * scene_data_block.data.penumbra_shadow_kernel[i].xy; vec3 pos = local_vert + tangent * disk.x + bitangent * disk.y; @@ -516,7 +516,7 @@ float light_process_omni_shadow(uint idx, vec3 vertex, vec3 normal, float taa_fr shadow += textureProj(sampler2DShadow(shadow_atlas, shadow_sampler), vec4(pos.xy, z_norm, 1.0)); } - shadow /= float(sc_penumbra_shadow_samples); + shadow /= float(sc_penumbra_shadow_samples()); shadow = mix(1.0, shadow, omni_lights.data[idx].shadow_opacity); } else { @@ -574,7 +574,7 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v float size_A = 0.0; - if (sc_use_light_soft_shadows && omni_lights.data[idx].size > 0.0) { + if (sc_use_light_soft_shadows() && omni_lights.data[idx].size > 0.0) { float t = omni_lights.data[idx].size / max(0.001, light_length); size_A = max(0.0, 1.0 - 1 / sqrt(1 + t * t)); } @@ -616,7 +616,7 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v #endif // !SHADOWS_DISABLED #endif // LIGHT_TRANSMITTANCE_USED - if (sc_use_light_projector && omni_lights.data[idx].projector_rect != vec4(0.0)) { + if (sc_use_light_projector() && omni_lights.data[idx].projector_rect != vec4(0.0)) { vec3 local_v = (omni_lights.data[idx].shadow_matrix * vec4(vertex, 1.0)).xyz; local_v = normalize(local_v); @@ -632,7 +632,7 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v local_v.xy = local_v.xy * 0.5 + 0.5; vec2 proj_uv = local_v.xy * atlas_rect.zw; - if (sc_projector_use_mipmaps) { + if (sc_projector_use_mipmaps()) { vec2 proj_uv_ddx; vec2 proj_uv_ddy; { @@ -716,7 +716,7 @@ float light_process_spot_shadow(uint idx, vec3 vertex, vec3 normal, float taa_fr splane /= splane.w; float shadow; - if (sc_use_light_soft_shadows && spot_lights.data[idx].soft_shadow_size > 0.0) { + if (sc_use_light_soft_shadows() && spot_lights.data[idx].soft_shadow_size > 0.0) { //soft shadow //find blocker @@ -737,7 +737,7 @@ float light_process_spot_shadow(uint idx, vec3 vertex, vec3 normal, float taa_fr float uv_size = spot_lights.data[idx].soft_shadow_size * z_norm * spot_lights.data[idx].soft_shadow_scale; vec2 clamp_max = spot_lights.data[idx].atlas_rect.xy + spot_lights.data[idx].atlas_rect.zw; - for (uint i = 0; i < sc_penumbra_shadow_samples; i++) { + for (uint i = 0; i < sc_penumbra_shadow_samples(); i++) { vec2 suv = shadow_uv + (disk_rotation * scene_data_block.data.penumbra_shadow_kernel[i].xy) * uv_size; suv = clamp(suv, spot_lights.data[idx].atlas_rect.xy, clamp_max); float d = textureLod(sampler2D(shadow_atlas, SAMPLER_LINEAR_CLAMP), suv, 0.0).r; @@ -754,13 +754,13 @@ float light_process_spot_shadow(uint idx, vec3 vertex, vec3 normal, float taa_fr uv_size *= penumbra; shadow = 0.0; - for (uint i = 0; i < sc_penumbra_shadow_samples; i++) { + for (uint i = 0; i < sc_penumbra_shadow_samples(); i++) { vec2 suv = shadow_uv + (disk_rotation * scene_data_block.data.penumbra_shadow_kernel[i].xy) * uv_size; suv = clamp(suv, spot_lights.data[idx].atlas_rect.xy, clamp_max); shadow += textureProj(sampler2DShadow(shadow_atlas, shadow_sampler), vec4(suv, splane.z, 1.0)); } - shadow /= float(sc_penumbra_shadow_samples); + shadow /= float(sc_penumbra_shadow_samples()); shadow = mix(1.0, shadow, spot_lights.data[idx].shadow_opacity); } else { @@ -831,7 +831,7 @@ void light_process_spot(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v float size_A = 0.0; - if (sc_use_light_soft_shadows && spot_lights.data[idx].size > 0.0) { + if (sc_use_light_soft_shadows() && spot_lights.data[idx].size > 0.0) { float t = spot_lights.data[idx].size / max(0.001, light_length); size_A = max(0.0, 1.0 - 1 / sqrt(1 + t * t)); } @@ -859,13 +859,13 @@ void light_process_spot(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v #endif // !SHADOWS_DISABLED #endif // LIGHT_TRANSMITTANCE_USED - if (sc_use_light_projector && spot_lights.data[idx].projector_rect != vec4(0.0)) { + if (sc_use_light_projector() && spot_lights.data[idx].projector_rect != vec4(0.0)) { vec4 splane = (spot_lights.data[idx].shadow_matrix * vec4(vertex, 1.0)); splane /= splane.w; vec2 proj_uv = splane.xy * spot_lights.data[idx].projector_rect.zw; - if (sc_projector_use_mipmaps) { + if (sc_projector_use_mipmaps()) { //ensure we have proper mipmaps vec4 splane_ddx = (spot_lights.data[idx].shadow_matrix * vec4(vertex + vertex_ddx, 1.0)); splane_ddx /= splane_ddx.w; @@ -940,7 +940,7 @@ void reflection_process(uint ref_index, vec3 vertex, vec3 ref_vec, vec3 normal, vec4 reflection; - reflection.rgb = textureLod(samplerCubeArray(reflection_atlas, DEFAULT_SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP), vec4(local_ref_vec, reflections.data[ref_index].index), sqrt(roughness) * MAX_ROUGHNESS_LOD).rgb * sc_luminance_multiplier; + reflection.rgb = textureLod(samplerCubeArray(reflection_atlas, DEFAULT_SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP), vec4(local_ref_vec, reflections.data[ref_index].index), sqrt(roughness) * MAX_ROUGHNESS_LOD).rgb * sc_luminance_multiplier(); reflection.rgb *= reflections.data[ref_index].exposure_normalization; if (reflections.data[ref_index].exterior) { reflection.rgb = mix(specular_light, reflection.rgb, blend); diff --git a/servers/rendering/renderer_rd/storage_rd/light_storage.cpp b/servers/rendering/renderer_rd/storage_rd/light_storage.cpp index b07063cfda28..8f7190915471 100644 --- a/servers/rendering/renderer_rd/storage_rd/light_storage.cpp +++ b/servers/rendering/renderer_rd/storage_rd/light_storage.cpp @@ -313,6 +313,12 @@ void LightStorage::light_omni_set_shadow_mode(RID p_light, RS::LightOmniShadowMo light->version++; light->dependency.changed_notify(Dependency::DEPENDENCY_CHANGED_LIGHT); + + if (p_mode == RS::LIGHT_OMNI_SHADOW_DUAL_PARABOLOID) { + shadow_dual_paraboloid_used = true; + } else if (p_mode == RS::LIGHT_OMNI_SHADOW_CUBE) { + shadow_cubemaps_used = true; + } } RS::LightOmniShadowMode LightStorage::light_omni_get_shadow_mode(RID p_light) { @@ -1478,21 +1484,20 @@ bool LightStorage::reflection_probe_instance_begin_render(RID p_instance, RID p_ //reflection atlas was unused, create: RD::TextureFormat tf; tf.array_layers = 6 * atlas->count; - tf.format = RendererSceneRenderRD::get_singleton()->_render_buffers_get_color_format(); + tf.format = get_reflection_probe_color_format(); tf.texture_type = RD::TEXTURE_TYPE_CUBE_ARRAY; tf.mipmaps = mipmaps; tf.width = atlas->size; tf.height = atlas->size; - tf.usage_bits = RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_SAMPLING_BIT | (RendererSceneRenderRD::get_singleton()->_render_buffers_can_be_storage() ? RD::TEXTURE_USAGE_STORAGE_BIT : 0); - + tf.usage_bits = get_reflection_probe_color_usage_bits(); atlas->reflection = RD::get_singleton()->texture_create(tf, RD::TextureView()); } { RD::TextureFormat tf; - tf.format = RD::get_singleton()->texture_is_format_supported_for_usage(RD::DATA_FORMAT_D32_SFLOAT, RD::TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) ? RD::DATA_FORMAT_D32_SFLOAT : RD::DATA_FORMAT_X8_D24_UNORM_PACK32; + tf.format = get_reflection_probe_depth_format(); tf.width = atlas->size; tf.height = atlas->size; - tf.usage_bits = RD::TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | RD::TEXTURE_USAGE_SAMPLING_BIT; + tf.usage_bits = get_reflection_probe_depth_usage_bits(); atlas->depth_buffer = RD::get_singleton()->texture_create(tf, RD::TextureView()); } atlas->reflections.resize(atlas->count); @@ -1763,6 +1768,22 @@ void LightStorage::update_reflection_probe_buffer(RenderDataRD *p_render_data, c } } +RD::DataFormat LightStorage::get_reflection_probe_color_format() { + return RendererSceneRenderRD::get_singleton()->_render_buffers_get_color_format(); +} + +uint32_t LightStorage::get_reflection_probe_color_usage_bits() { + return RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_SAMPLING_BIT | (RendererSceneRenderRD::get_singleton()->_render_buffers_can_be_storage() ? RD::TEXTURE_USAGE_STORAGE_BIT : 0); +} + +RD::DataFormat LightStorage::get_reflection_probe_depth_format() { + return RD::get_singleton()->texture_is_format_supported_for_usage(RD::DATA_FORMAT_D32_SFLOAT, RD::TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) ? RD::DATA_FORMAT_D32_SFLOAT : RD::DATA_FORMAT_X8_D24_UNORM_PACK32; +} + +uint32_t LightStorage::get_reflection_probe_depth_usage_bits() { + return RD::TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | RD::TEXTURE_USAGE_SAMPLING_BIT; +} + /* LIGHTMAP API */ RID LightStorage::lightmap_allocate() { @@ -1996,10 +2017,10 @@ void LightStorage::shadow_atlas_free(RID p_atlas) { void LightStorage::_update_shadow_atlas(ShadowAtlas *shadow_atlas) { if (shadow_atlas->size > 0 && shadow_atlas->depth.is_null()) { RD::TextureFormat tf; - tf.format = shadow_atlas->use_16_bits ? RD::DATA_FORMAT_D16_UNORM : RD::DATA_FORMAT_D32_SFLOAT; + tf.format = get_shadow_atlas_depth_format(shadow_atlas->use_16_bits); tf.width = shadow_atlas->size; tf.height = shadow_atlas->size; - tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; + tf.usage_bits = get_shadow_atlas_depth_usage_bits(); shadow_atlas->depth = RD::get_singleton()->texture_create(tf, RD::TextureView()); Vector fb_tex; @@ -2384,15 +2405,23 @@ void LightStorage::shadow_atlas_update(RID p_atlas) { _update_shadow_atlas(shadow_atlas); } +RD::DataFormat LightStorage::get_shadow_atlas_depth_format(bool p_16_bits) { + return p_16_bits ? RD::DATA_FORMAT_D16_UNORM : RD::DATA_FORMAT_D32_SFLOAT; +} + +uint32_t LightStorage::get_shadow_atlas_depth_usage_bits() { + return RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; +} + /* DIRECTIONAL SHADOW */ void LightStorage::update_directional_shadow_atlas() { if (directional_shadow.depth.is_null() && directional_shadow.size > 0) { RD::TextureFormat tf; - tf.format = directional_shadow.use_16_bits ? RD::DATA_FORMAT_D16_UNORM : RD::DATA_FORMAT_D32_SFLOAT; + tf.format = get_shadow_atlas_depth_format(directional_shadow.use_16_bits); tf.width = directional_shadow.size; tf.height = directional_shadow.size; - tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; + tf.usage_bits = get_shadow_atlas_depth_usage_bits(); directional_shadow.depth = RD::get_singleton()->texture_create(tf, RD::TextureView()); Vector fb_tex; @@ -2477,12 +2506,12 @@ LightStorage::ShadowCubemap *LightStorage::_get_shadow_cubemap(int p_size) { ShadowCubemap sc; { RD::TextureFormat tf; - tf.format = RD::get_singleton()->texture_is_format_supported_for_usage(RD::DATA_FORMAT_D32_SFLOAT, RD::TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) ? RD::DATA_FORMAT_D32_SFLOAT : RD::DATA_FORMAT_X8_D24_UNORM_PACK32; + tf.format = get_cubemap_depth_format(); tf.width = p_size; tf.height = p_size; tf.texture_type = RD::TEXTURE_TYPE_CUBE; tf.array_layers = 6; - tf.usage_bits = RD::TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | RD::TEXTURE_USAGE_SAMPLING_BIT; + tf.usage_bits = get_cubemap_depth_usage_bits(); sc.cubemap = RD::get_singleton()->texture_create(tf, RD::TextureView()); } @@ -2510,3 +2539,19 @@ RID LightStorage::get_cubemap_fb(int p_size, int p_pass) { return cubemap->side_fb[p_pass]; } + +RD::DataFormat LightStorage::get_cubemap_depth_format() { + return RD::get_singleton()->texture_is_format_supported_for_usage(RD::DATA_FORMAT_D32_SFLOAT, RD::TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) ? RD::DATA_FORMAT_D32_SFLOAT : RD::DATA_FORMAT_X8_D24_UNORM_PACK32; +} + +uint32_t LightStorage::get_cubemap_depth_usage_bits() { + return RD::TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | RD::TEXTURE_USAGE_SAMPLING_BIT; +} + +bool LightStorage::get_shadow_cubemaps_used() const { + return shadow_cubemaps_used; +} + +bool LightStorage::get_shadow_dual_paraboloid_used() const { + return shadow_dual_paraboloid_used; +} diff --git a/servers/rendering/renderer_rd/storage_rd/light_storage.h b/servers/rendering/renderer_rd/storage_rd/light_storage.h index 1db58d72f9d1..59303e8a73ca 100644 --- a/servers/rendering/renderer_rd/storage_rd/light_storage.h +++ b/servers/rendering/renderer_rd/storage_rd/light_storage.h @@ -434,6 +434,11 @@ class LightStorage : public RendererLightStorage { HashMap shadow_cubemaps; ShadowCubemap *_get_shadow_cubemap(int p_size); + /* PIPELINE HINTS */ + + bool shadow_cubemaps_used = false; + bool shadow_dual_paraboloid_used = false; + public: static LightStorage *get_singleton(); @@ -938,6 +943,10 @@ class LightStorage : public RendererLightStorage { void set_max_reflection_probes(const uint32_t p_max_reflection_probes); RID get_reflection_probe_buffer() { return reflection_buffer; } void update_reflection_probe_buffer(RenderDataRD *p_render_data, const PagedArray &p_reflections, const Transform3D &p_camera_inverse_transform, RID p_environment); + static RD::DataFormat get_reflection_probe_color_format(); + static uint32_t get_reflection_probe_color_usage_bits(); + static RD::DataFormat get_reflection_probe_depth_format(); + static uint32_t get_reflection_probe_depth_usage_bits(); /* LIGHTMAP */ @@ -1079,6 +1088,8 @@ class LightStorage : public RendererLightStorage { } virtual void shadow_atlas_update(RID p_atlas) override; + static RD::DataFormat get_shadow_atlas_depth_format(bool p_16_bits); + static uint32_t get_shadow_atlas_depth_usage_bits(); /* DIRECTIONAL SHADOW */ @@ -1109,6 +1120,13 @@ class LightStorage : public RendererLightStorage { RID get_cubemap(int p_size); RID get_cubemap_fb(int p_size, int p_pass); + static RD::DataFormat get_cubemap_depth_format(); + static uint32_t get_cubemap_depth_usage_bits(); + + /* PIPELINE HINTS */ + + bool get_shadow_cubemaps_used() const; + bool get_shadow_dual_paraboloid_used() const; }; } // namespace RendererRD diff --git a/servers/rendering/renderer_rd/storage_rd/material_storage.cpp b/servers/rendering/renderer_rd/storage_rd/material_storage.cpp index 3bfc1bd15ca1..137a318707aa 100644 --- a/servers/rendering/renderer_rd/storage_rd/material_storage.cpp +++ b/servers/rendering/renderer_rd/storage_rd/material_storage.cpp @@ -633,6 +633,93 @@ bool MaterialStorage::ShaderData::is_parameter_texture(const StringName &p_param return uniforms[p_param].is_texture(); } +RD::PipelineColorBlendState::Attachment MaterialStorage::ShaderData::blend_mode_to_blend_attachment(BlendMode p_mode) { + RD::PipelineColorBlendState::Attachment attachment; + + switch (p_mode) { + case BLEND_MODE_MIX: { + attachment.enable_blend = true; + attachment.alpha_blend_op = RD::BLEND_OP_ADD; + attachment.color_blend_op = RD::BLEND_OP_ADD; + attachment.src_color_blend_factor = RD::BLEND_FACTOR_SRC_ALPHA; + attachment.dst_color_blend_factor = RD::BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; + attachment.src_alpha_blend_factor = RD::BLEND_FACTOR_ONE; + attachment.dst_alpha_blend_factor = RD::BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; + } break; + case BLEND_MODE_ADD: { + attachment.enable_blend = true; + attachment.alpha_blend_op = RD::BLEND_OP_ADD; + attachment.color_blend_op = RD::BLEND_OP_ADD; + attachment.src_color_blend_factor = RD::BLEND_FACTOR_SRC_ALPHA; + attachment.dst_color_blend_factor = RD::BLEND_FACTOR_ONE; + attachment.src_alpha_blend_factor = RD::BLEND_FACTOR_SRC_ALPHA; + attachment.dst_alpha_blend_factor = RD::BLEND_FACTOR_ONE; + } break; + case BLEND_MODE_SUB: { + attachment.enable_blend = true; + attachment.alpha_blend_op = RD::BLEND_OP_REVERSE_SUBTRACT; + attachment.color_blend_op = RD::BLEND_OP_REVERSE_SUBTRACT; + attachment.src_color_blend_factor = RD::BLEND_FACTOR_SRC_ALPHA; + attachment.dst_color_blend_factor = RD::BLEND_FACTOR_ONE; + attachment.src_alpha_blend_factor = RD::BLEND_FACTOR_SRC_ALPHA; + attachment.dst_alpha_blend_factor = RD::BLEND_FACTOR_ONE; + } break; + case BLEND_MODE_MUL: { + attachment.enable_blend = true; + attachment.alpha_blend_op = RD::BLEND_OP_ADD; + attachment.color_blend_op = RD::BLEND_OP_ADD; + attachment.src_color_blend_factor = RD::BLEND_FACTOR_DST_COLOR; + attachment.dst_color_blend_factor = RD::BLEND_FACTOR_ZERO; + attachment.src_alpha_blend_factor = RD::BLEND_FACTOR_DST_ALPHA; + attachment.dst_alpha_blend_factor = RD::BLEND_FACTOR_ZERO; + } break; + case BLEND_MODE_ALPHA_TO_COVERAGE: { + attachment.enable_blend = true; + attachment.alpha_blend_op = RD::BLEND_OP_ADD; + attachment.color_blend_op = RD::BLEND_OP_ADD; + attachment.src_color_blend_factor = RD::BLEND_FACTOR_SRC_ALPHA; + attachment.dst_color_blend_factor = RD::BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; + attachment.src_alpha_blend_factor = RD::BLEND_FACTOR_ONE; + attachment.dst_alpha_blend_factor = RD::BLEND_FACTOR_ZERO; + } break; + case BLEND_MODE_PREMULTIPLIED_ALPHA: { + attachment.enable_blend = true; + attachment.alpha_blend_op = RD::BLEND_OP_ADD; + attachment.color_blend_op = RD::BLEND_OP_ADD; + attachment.src_color_blend_factor = RD::BLEND_FACTOR_ONE; + attachment.dst_color_blend_factor = RD::BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; + attachment.src_alpha_blend_factor = RD::BLEND_FACTOR_ONE; + attachment.dst_alpha_blend_factor = RD::BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; + } break; + case BLEND_MODE_DISABLED: + default: { + // Use default attachment values. + } break; + } + + return attachment; +} + +bool MaterialStorage::ShaderData::blend_mode_uses_blend_alpha(BlendMode p_mode) { + switch (p_mode) { + case BLEND_MODE_MIX: + return false; + case BLEND_MODE_ADD: + return true; + case BLEND_MODE_SUB: + return true; + case BLEND_MODE_MUL: + return true; + case BLEND_MODE_ALPHA_TO_COVERAGE: + return false; + case BLEND_MODE_PREMULTIPLIED_ALPHA: + return true; + case BLEND_MODE_DISABLED: + default: + return false; + } +} + /////////////////////////////////////////////////////////////////////////// // MaterialStorage::MaterialData @@ -1183,6 +1270,10 @@ MaterialStorage::~MaterialStorage() { singleton = nullptr; } +bool MaterialStorage::can_create_resources_async() const { + return true; +} + bool MaterialStorage::free(RID p_rid) { if (owns_shader(p_rid)) { shader_free(p_rid); @@ -2021,6 +2112,7 @@ void MaterialStorage::_material_uniform_set_erased(void *p_material) { } void MaterialStorage::_material_queue_update(Material *material, bool p_uniform, bool p_texture) { + MutexLock lock(material_update_list_mutex); material->uniform_dirty = material->uniform_dirty || p_uniform; material->texture_dirty = material->texture_dirty || p_texture; @@ -2032,6 +2124,7 @@ void MaterialStorage::_material_queue_update(Material *material, bool p_uniform, } void MaterialStorage::_update_queued_materials() { + MutexLock lock(material_update_list_mutex); while (material_update_list.first()) { Material *material = material_update_list.first()->self(); bool uniforms_changed = false; diff --git a/servers/rendering/renderer_rd/storage_rd/material_storage.h b/servers/rendering/renderer_rd/storage_rd/material_storage.h index 9c53450462ef..3d53a65fadc6 100644 --- a/servers/rendering/renderer_rd/storage_rd/material_storage.h +++ b/servers/rendering/renderer_rd/storage_rd/material_storage.h @@ -56,6 +56,16 @@ class MaterialStorage : public RendererMaterialStorage { }; struct ShaderData { + enum BlendMode { + BLEND_MODE_MIX, + BLEND_MODE_ADD, + BLEND_MODE_SUB, + BLEND_MODE_MUL, + BLEND_MODE_ALPHA_TO_COVERAGE, + BLEND_MODE_PREMULTIPLIED_ALPHA, + BLEND_MODE_DISABLED + }; + String path; HashMap uniforms; HashMap> default_texture_params; @@ -73,6 +83,9 @@ class MaterialStorage : public RendererMaterialStorage { virtual RS::ShaderNativeSourceCode get_native_source_code() const { return RS::ShaderNativeSourceCode(); } virtual ~ShaderData() {} + + static RD::PipelineColorBlendState::Attachment blend_mode_to_blend_attachment(BlendMode p_mode); + static bool blend_mode_uses_blend_alpha(BlendMode p_mode); }; struct MaterialData { @@ -244,6 +257,7 @@ class MaterialStorage : public RendererMaterialStorage { Material *get_material(RID p_rid) { return material_owner.get_or_null(p_rid); }; SelfList::List material_update_list; + Mutex material_update_list_mutex; static void _material_uniform_set_erased(void *p_material); @@ -253,6 +267,8 @@ class MaterialStorage : public RendererMaterialStorage { MaterialStorage(); virtual ~MaterialStorage(); + virtual bool can_create_resources_async() const override; + bool free(RID p_rid); /* Helpers */ diff --git a/servers/rendering/renderer_rd/storage_rd/mesh_storage.cpp b/servers/rendering/renderer_rd/storage_rd/mesh_storage.cpp index 9ae39691dc28..2fa21ed56f24 100644 --- a/servers/rendering/renderer_rd/storage_rd/mesh_storage.cpp +++ b/servers/rendering/renderer_rd/storage_rd/mesh_storage.cpp @@ -1142,97 +1142,71 @@ void MeshStorage::update_mesh_instances() { RD::get_singleton()->compute_list_end(); } -void MeshStorage::_mesh_surface_generate_version_for_input_mask(Mesh::Surface::Version &v, Mesh::Surface *s, uint64_t p_input_mask, bool p_input_motion_vectors, MeshInstance::Surface *mis, uint32_t p_current_buffer, uint32_t p_previous_buffer) { +RD::VertexFormatID MeshStorage::_mesh_surface_generate_vertex_format(uint64_t p_surface_format, uint64_t p_input_mask, bool p_instanced_surface, bool p_input_motion_vectors, uint32_t &r_position_stride) { Vector attributes; - Vector buffers; - Vector offsets; - - uint32_t position_stride = 0; uint32_t normal_tangent_stride = 0; uint32_t attribute_stride = 0; uint32_t skin_stride = 0; + RD::VertexAttribute vd; + + r_position_stride = 0; for (int i = 0; i < RS::ARRAY_INDEX; i++) { - RD::VertexAttribute vd; - RID buffer; vd.location = i; - uint64_t offset = 0; - if (!(s->format & (1ULL << i))) { - // Not supplied by surface, use default value - buffer = mesh_default_rd_buffers[i]; + if (!(p_surface_format & (1ULL << i))) { vd.stride = 0; switch (i) { - case RS::ARRAY_VERTEX: { - vd.format = RD::DATA_FORMAT_R32G32B32_SFLOAT; - - } break; - case RS::ARRAY_NORMAL: { + case RS::ARRAY_VERTEX: + case RS::ARRAY_NORMAL: vd.format = RD::DATA_FORMAT_R32G32B32_SFLOAT; - } break; - case RS::ARRAY_TANGENT: { - vd.format = RD::DATA_FORMAT_R32G32B32A32_SFLOAT; - } break; - case RS::ARRAY_COLOR: { - vd.format = RD::DATA_FORMAT_R32G32B32A32_SFLOAT; - - } break; - case RS::ARRAY_TEX_UV: { - vd.format = RD::DATA_FORMAT_R32G32_SFLOAT; - - } break; - case RS::ARRAY_TEX_UV2: { + break; + case RS::ARRAY_TEX_UV: + case RS::ARRAY_TEX_UV2: vd.format = RD::DATA_FORMAT_R32G32_SFLOAT; - } break; + break; + case RS::ARRAY_BONES: + vd.format = RD::DATA_FORMAT_R32G32B32A32_UINT; + break; + case RS::ARRAY_TANGENT: + case RS::ARRAY_COLOR: case RS::ARRAY_CUSTOM0: case RS::ARRAY_CUSTOM1: case RS::ARRAY_CUSTOM2: - case RS::ARRAY_CUSTOM3: { - //assumed weights too - vd.format = RD::DATA_FORMAT_R32G32B32A32_SFLOAT; - } break; - case RS::ARRAY_BONES: { - //assumed weights too - vd.format = RD::DATA_FORMAT_R32G32B32A32_UINT; - } break; - case RS::ARRAY_WEIGHTS: { - //assumed weights too + case RS::ARRAY_CUSTOM3: + case RS::ARRAY_WEIGHTS: vd.format = RD::DATA_FORMAT_R32G32B32A32_SFLOAT; - } break; + break; + default: + DEV_ASSERT(false && "Unknown vertex format element."); + break; } } else { - //Supplied, use it - - vd.stride = 1; //mark that it needs a stride set (default uses 0) + // Mark that it needs a stride set (default uses 0). + vd.stride = 1; switch (i) { case RS::ARRAY_VERTEX: { - vd.offset = position_stride; + vd.offset = r_position_stride; - if (s->format & RS::ARRAY_FLAG_USE_2D_VERTICES) { + if (p_surface_format & RS::ARRAY_FLAG_USE_2D_VERTICES) { vd.format = RD::DATA_FORMAT_R32G32_SFLOAT; - position_stride = sizeof(float) * 2; + r_position_stride = sizeof(float) * 2; } else { - if (!mis && (s->format & RS::ARRAY_FLAG_COMPRESS_ATTRIBUTES)) { + if (!p_instanced_surface && (p_surface_format & RS::ARRAY_FLAG_COMPRESS_ATTRIBUTES)) { vd.format = RD::DATA_FORMAT_R16G16B16A16_UNORM; - position_stride = sizeof(uint16_t) * 4; + r_position_stride = sizeof(uint16_t) * 4; } else { vd.format = RD::DATA_FORMAT_R32G32B32_SFLOAT; - position_stride = sizeof(float) * 3; + r_position_stride = sizeof(float) * 3; } } - if (mis) { - buffer = mis->vertex_buffer[p_current_buffer]; - } else { - buffer = s->vertex_buffer; - } - } break; case RS::ARRAY_NORMAL: { vd.offset = 0; - offset = position_stride * s->vertex_count; - if (!mis && (s->format & RS::ARRAY_FLAG_COMPRESS_ATTRIBUTES)) { + + if (!p_instanced_surface && (p_surface_format & RS::ARRAY_FLAG_COMPRESS_ATTRIBUTES)) { vd.format = RD::DATA_FORMAT_R16G16_UNORM; normal_tangent_stride += sizeof(uint16_t) * 2; } else { @@ -1240,20 +1214,14 @@ void MeshStorage::_mesh_surface_generate_version_for_input_mask(Mesh::Surface::V // A small trick here: if we are uncompressed and we have normals, but no tangents. We need // the shader to think there are 4 components to "axis_tangent_attrib". So we give a size of 4, // but a stride based on only having 2 elements. - if (!(s->format & RS::ARRAY_FORMAT_TANGENT)) { + if (!(p_surface_format & RS::ARRAY_FORMAT_TANGENT)) { normal_tangent_stride += sizeof(uint16_t) * 2; } else { normal_tangent_stride += sizeof(uint16_t) * 4; } } - if (mis) { - buffer = mis->vertex_buffer[p_current_buffer]; - } else { - buffer = s->vertex_buffer; - } } break; case RS::ARRAY_TANGENT: { - buffer = mesh_default_rd_buffers[i]; vd.stride = 0; vd.format = RD::DATA_FORMAT_R32G32B32A32_SFLOAT; } break; @@ -1262,30 +1230,27 @@ void MeshStorage::_mesh_surface_generate_version_for_input_mask(Mesh::Surface::V vd.format = RD::DATA_FORMAT_R8G8B8A8_UNORM; attribute_stride += sizeof(int8_t) * 4; - buffer = s->attribute_buffer; } break; case RS::ARRAY_TEX_UV: { vd.offset = attribute_stride; - if (s->format & RS::ARRAY_FLAG_COMPRESS_ATTRIBUTES) { + if (p_surface_format & RS::ARRAY_FLAG_COMPRESS_ATTRIBUTES) { vd.format = RD::DATA_FORMAT_R16G16_UNORM; attribute_stride += sizeof(uint16_t) * 2; } else { vd.format = RD::DATA_FORMAT_R32G32_SFLOAT; attribute_stride += sizeof(float) * 2; } - buffer = s->attribute_buffer; } break; case RS::ARRAY_TEX_UV2: { vd.offset = attribute_stride; - if (s->format & RS::ARRAY_FLAG_COMPRESS_ATTRIBUTES) { + if (p_surface_format & RS::ARRAY_FLAG_COMPRESS_ATTRIBUTES) { vd.format = RD::DATA_FORMAT_R16G16_UNORM; attribute_stride += sizeof(uint16_t) * 2; } else { vd.format = RD::DATA_FORMAT_R32G32_SFLOAT; attribute_stride += sizeof(float) * 2; } - buffer = s->attribute_buffer; } break; case RS::ARRAY_CUSTOM0: case RS::ARRAY_CUSTOM1: @@ -1295,26 +1260,23 @@ void MeshStorage::_mesh_surface_generate_version_for_input_mask(Mesh::Surface::V int idx = i - RS::ARRAY_CUSTOM0; const uint32_t fmt_shift[RS::ARRAY_CUSTOM_COUNT] = { RS::ARRAY_FORMAT_CUSTOM0_SHIFT, RS::ARRAY_FORMAT_CUSTOM1_SHIFT, RS::ARRAY_FORMAT_CUSTOM2_SHIFT, RS::ARRAY_FORMAT_CUSTOM3_SHIFT }; - uint32_t fmt = (s->format >> fmt_shift[idx]) & RS::ARRAY_FORMAT_CUSTOM_MASK; + uint32_t fmt = (p_surface_format >> fmt_shift[idx]) & RS::ARRAY_FORMAT_CUSTOM_MASK; const uint32_t fmtsize[RS::ARRAY_CUSTOM_MAX] = { 4, 4, 4, 8, 4, 8, 12, 16 }; const RD::DataFormat fmtrd[RS::ARRAY_CUSTOM_MAX] = { RD::DATA_FORMAT_R8G8B8A8_UNORM, RD::DATA_FORMAT_R8G8B8A8_SNORM, RD::DATA_FORMAT_R16G16_SFLOAT, RD::DATA_FORMAT_R16G16B16A16_SFLOAT, RD::DATA_FORMAT_R32_SFLOAT, RD::DATA_FORMAT_R32G32_SFLOAT, RD::DATA_FORMAT_R32G32B32_SFLOAT, RD::DATA_FORMAT_R32G32B32A32_SFLOAT }; vd.format = fmtrd[fmt]; attribute_stride += fmtsize[fmt]; - buffer = s->attribute_buffer; } break; case RS::ARRAY_BONES: { vd.offset = skin_stride; vd.format = RD::DATA_FORMAT_R16G16B16A16_UINT; skin_stride += sizeof(int16_t) * 4; - buffer = s->skin_buffer; } break; case RS::ARRAY_WEIGHTS: { vd.offset = skin_stride; vd.format = RD::DATA_FORMAT_R16G16B16A16_UNORM; skin_stride += sizeof(int16_t) * 4; - buffer = s->skin_buffer; } break; } } @@ -1324,13 +1286,10 @@ void MeshStorage::_mesh_surface_generate_version_for_input_mask(Mesh::Surface::V } attributes.push_back(vd); - buffers.push_back(buffer); - offsets.push_back(offset); if (p_input_motion_vectors) { - // Since the previous vertex, normal and tangent can't be part of the vertex format but they are required when motion - // vectors are enabled, we opt to push a copy of the vertex attribute with a different location and buffer (if it's - // part of an instance that has one). + // Since the previous vertex, normal and tangent can't be part of the vertex format but they are required when + // motion vectors are enabled, we opt to push a copy of the vertex attribute with a different location. switch (i) { case RS::ARRAY_VERTEX: { vd.location = ATTRIBUTE_LOCATION_PREV_VERTEX; @@ -1344,25 +1303,21 @@ void MeshStorage::_mesh_surface_generate_version_for_input_mask(Mesh::Surface::V } if (int(vd.location) != i) { - if (mis && buffer != mesh_default_rd_buffers[i]) { - buffer = mis->vertex_buffer[p_previous_buffer]; - } - attributes.push_back(vd); - buffers.push_back(buffer); - offsets.push_back(offset); } } } - //update final stride + // Update final stride. for (int i = 0; i < attributes.size(); i++) { if (attributes[i].stride == 0) { - continue; //default location + // Default location. + continue; } + int loc = attributes[i].location; if (loc == RS::ARRAY_VERTEX || loc == ATTRIBUTE_LOCATION_PREV_VERTEX) { - attributes.write[i].stride = position_stride; + attributes.write[i].stride = r_position_stride; } else if ((loc < RS::ARRAY_COLOR) || ((loc >= ATTRIBUTE_LOCATION_PREV_NORMAL) && (loc <= ATTRIBUTE_LOCATION_PREV_TANGENT))) { attributes.write[i].stride = normal_tangent_stride; } else if (loc < RS::ARRAY_BONES) { @@ -1372,11 +1327,75 @@ void MeshStorage::_mesh_surface_generate_version_for_input_mask(Mesh::Surface::V } } + return RD::get_singleton()->vertex_format_create(attributes); +} + +void MeshStorage::_mesh_surface_generate_version_for_input_mask(Mesh::Surface::Version &v, Mesh::Surface *s, uint64_t p_input_mask, bool p_input_motion_vectors, MeshInstance::Surface *mis, uint32_t p_current_buffer, uint32_t p_previous_buffer) { + uint32_t position_stride = 0; + v.vertex_format = _mesh_surface_generate_vertex_format(s->format, p_input_mask, mis != nullptr, p_input_motion_vectors, position_stride); + + Vector buffers; + Vector offsets; + RID buffer; + uint64_t offset = 0; + for (int i = 0; i < RS::ARRAY_INDEX; i++) { + offset = 0; + + if (!(s->format & (1ULL << i))) { + // Not supplied by surface, use default buffers. + buffer = mesh_default_rd_buffers[i]; + } else { + // Supplied by surface, use buffer. + switch (i) { + case RS::ARRAY_VERTEX: + case RS::ARRAY_NORMAL: + offset = i == RS::ARRAY_NORMAL ? position_stride * s->vertex_count : 0; + buffer = mis != nullptr ? mis->vertex_buffer[p_current_buffer] : s->vertex_buffer; + break; + case RS::ARRAY_TANGENT: + buffer = mesh_default_rd_buffers[i]; + break; + case RS::ARRAY_COLOR: + case RS::ARRAY_TEX_UV: + case RS::ARRAY_TEX_UV2: + case RS::ARRAY_CUSTOM0: + case RS::ARRAY_CUSTOM1: + case RS::ARRAY_CUSTOM2: + case RS::ARRAY_CUSTOM3: + buffer = s->attribute_buffer; + break; + case RS::ARRAY_BONES: + case RS::ARRAY_WEIGHTS: + buffer = s->skin_buffer; + break; + } + } + + if (!(p_input_mask & (1ULL << i))) { + continue; // Shader does not need this, skip it (but computing stride was important anyway) + } + + buffers.push_back(buffer); + offsets.push_back(offset); + + if (p_input_motion_vectors) { + // Push the buffer for motion vector inputs. + if (i == RS::ARRAY_VERTEX || i == RS::ARRAY_NORMAL || i == RS::ARRAY_TANGENT) { + if (mis && buffer != mesh_default_rd_buffers[i]) { + buffers.push_back(mis->vertex_buffer[p_previous_buffer]); + } else { + buffers.push_back(buffer); + } + + offsets.push_back(offset); + } + } + } + v.input_mask = p_input_mask; v.current_buffer = p_current_buffer; v.previous_buffer = p_previous_buffer; v.input_motion_vectors = p_input_motion_vectors; - v.vertex_format = RD::get_singleton()->vertex_format_create(attributes); v.vertex_array = RD::get_singleton()->vertex_array_create(s->vertex_count, v.vertex_format, buffers, offsets); } diff --git a/servers/rendering/renderer_rd/storage_rd/mesh_storage.h b/servers/rendering/renderer_rd/storage_rd/mesh_storage.h index f811314fb66d..6784520d17e3 100644 --- a/servers/rendering/renderer_rd/storage_rd/mesh_storage.h +++ b/servers/rendering/renderer_rd/storage_rd/mesh_storage.h @@ -199,6 +199,7 @@ class MeshStorage : public RendererMeshStorage { weight_update_list(this), array_update_list(this) {} }; + RD::VertexFormatID _mesh_surface_generate_vertex_format(uint64_t p_surface_format, uint64_t p_input_mask, bool p_instanced_surface, bool p_input_motion_vectors, uint32_t &r_position_stride); void _mesh_surface_generate_version_for_input_mask(Mesh::Surface::Version &v, Mesh::Surface *s, uint64_t p_input_mask, bool p_input_motion_vectors, MeshInstance::Surface *mis = nullptr, uint32_t p_current_buffer = 0, uint32_t p_previous_buffer = 0); void _mesh_instance_clear(MeshInstance *mi); @@ -605,6 +606,12 @@ class MeshStorage : public RendererMeshStorage { return s->particles_render_index; } + _FORCE_INLINE_ RD::VertexFormatID mesh_surface_get_vertex_format(void *p_surface, uint64_t p_input_mask, bool p_instanced_surface, bool p_input_motion_vectors) { + Mesh::Surface *s = reinterpret_cast(p_surface); + uint32_t position_stride = 0; + return _mesh_surface_generate_vertex_format(s->format, p_input_mask, p_instanced_surface, p_input_motion_vectors, position_stride); + } + Dependency *mesh_get_dependency(RID p_mesh) const; /* MESH INSTANCE API */ diff --git a/servers/rendering/renderer_rd/storage_rd/render_scene_buffers_rd.cpp b/servers/rendering/renderer_rd/storage_rd/render_scene_buffers_rd.cpp index 2f44096dc869..ca44f4dd7ed5 100644 --- a/servers/rendering/renderer_rd/storage_rd/render_scene_buffers_rd.cpp +++ b/servers/rendering/renderer_rd/storage_rd/render_scene_buffers_rd.cpp @@ -167,76 +167,27 @@ void RenderSceneBuffersRD::configure(const RenderSceneBuffersConfiguration *p_co // cleanout any old buffers we had. cleanup(); - // At least one of these is required to be supported. - RenderingDeviceCommons::DataFormat preferred_format[2] = { RD::DATA_FORMAT_D24_UNORM_S8_UINT, RD::DATA_FORMAT_D32_SFLOAT_S8_UINT }; - if (can_be_storage) { - // Prefer higher precision on desktop. - preferred_format[0] = RD::DATA_FORMAT_D32_SFLOAT_S8_UINT; - preferred_format[1] = RD::DATA_FORMAT_D24_UNORM_S8_UINT; - } - - // create our 3D render buffers - { - // Create our color buffer(s) - uint32_t usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | (can_be_storage ? RD::TEXTURE_USAGE_STORAGE_BIT : 0) | RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT; - usage_bits |= RD::TEXTURE_USAGE_INPUT_ATTACHMENT_BIT; // only needed when using subpasses in the mobile renderer - - // our internal texture should have MSAA support if applicable - if (msaa_3d != RS::VIEWPORT_MSAA_DISABLED) { - usage_bits |= RD::TEXTURE_USAGE_CAN_COPY_TO_BIT; - } - - create_texture(RB_SCOPE_BUFFERS, RB_TEX_COLOR, base_data_format, usage_bits); - } - - // Create our depth buffer - { - // TODO Lazy create this in case we've got an external depth buffer - - RD::DataFormat format; - uint32_t usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT; - - if (msaa_3d == RS::VIEWPORT_MSAA_DISABLED) { - usage_bits |= RD::TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; - format = RD::get_singleton()->texture_is_format_supported_for_usage(preferred_format[0], usage_bits) ? preferred_format[0] : preferred_format[1]; - } else { - format = RD::DATA_FORMAT_R32_SFLOAT; - usage_bits |= RD::TEXTURE_USAGE_CAN_COPY_TO_BIT | (can_be_storage ? RD::TEXTURE_USAGE_STORAGE_BIT : 0); - } + // Create our color buffer. + const bool resolve_target = msaa_3d != RS::VIEWPORT_MSAA_DISABLED; + create_texture(RB_SCOPE_BUFFERS, RB_TEX_COLOR, base_data_format, get_color_usage_bits(resolve_target, false, can_be_storage)); - create_texture(RB_SCOPE_BUFFERS, RB_TEX_DEPTH, format, usage_bits); - } + // Create our depth buffer. + create_texture(RB_SCOPE_BUFFERS, RB_TEX_DEPTH, get_depth_format(resolve_target, false, can_be_storage), get_depth_usage_bits(resolve_target, false, can_be_storage)); - // Create our MSAA buffers + // Create our MSAA buffers. if (msaa_3d == RS::VIEWPORT_MSAA_DISABLED) { texture_samples = RD::TEXTURE_SAMPLES_1; } else { - RD::DataFormat format = base_data_format; - uint32_t usage_bits = RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT | RD::TEXTURE_USAGE_SAMPLING_BIT; - - const RD::TextureSamples ts[RS::VIEWPORT_MSAA_MAX] = { - RD::TEXTURE_SAMPLES_1, - RD::TEXTURE_SAMPLES_2, - RD::TEXTURE_SAMPLES_4, - RD::TEXTURE_SAMPLES_8, - }; - - texture_samples = ts[msaa_3d]; - - create_texture(RB_SCOPE_BUFFERS, RB_TEX_COLOR_MSAA, format, usage_bits, texture_samples); - - usage_bits = RD::TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT | RD::TEXTURE_USAGE_SAMPLING_BIT; - format = RD::get_singleton()->texture_is_format_supported_for_usage(preferred_format[0], usage_bits) ? preferred_format[0] : preferred_format[1]; - - create_texture(RB_SCOPE_BUFFERS, RB_TEX_DEPTH_MSAA, format, usage_bits, texture_samples); + texture_samples = msaa_to_samples(msaa_3d); + create_texture(RB_SCOPE_BUFFERS, RB_TEX_COLOR_MSAA, base_data_format, get_color_usage_bits(false, true, can_be_storage), texture_samples); + create_texture(RB_SCOPE_BUFFERS, RB_TEX_DEPTH_MSAA, get_depth_format(false, true, can_be_storage), get_depth_usage_bits(false, true, can_be_storage), texture_samples); } // VRS (note, our vrs object will only be set if VRS is supported) RID vrs_texture; RS::ViewportVRSMode vrs_mode = texture_storage->render_target_get_vrs_mode(render_target); if (vrs && vrs_mode != RS::VIEWPORT_VRS_DISABLED) { - uint32_t usage_bits = RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_VRS_ATTACHMENT_BIT | RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; - vrs_texture = create_texture(RB_SCOPE_VRS, RB_TEXTURE, RD::DATA_FORMAT_R8_UINT, usage_bits, RD::TEXTURE_SAMPLES_1, vrs->get_vrs_texture_size(internal_size)); + vrs_texture = create_texture(RB_SCOPE_VRS, RB_TEXTURE, get_vrs_format(), get_vrs_usage_bits(), RD::TEXTURE_SAMPLES_1, vrs->get_vrs_texture_size(internal_size)); } // (re-)configure any named buffers @@ -664,16 +615,12 @@ void RenderSceneBuffersRD::ensure_upscaled() { void RenderSceneBuffersRD::ensure_velocity() { if (!has_texture(RB_SCOPE_BUFFERS, RB_TEX_VELOCITY)) { - uint32_t usage_bits = RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; - - if (msaa_3d != RS::VIEWPORT_MSAA_DISABLED) { - uint32_t msaa_usage_bits = RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT; - usage_bits |= RD::TEXTURE_USAGE_CAN_COPY_TO_BIT; + const bool msaa = msaa_3d != RS::VIEWPORT_MSAA_DISABLED; + create_texture(RB_SCOPE_BUFFERS, RB_TEX_VELOCITY, get_velocity_format(), get_velocity_usage_bits(msaa, false, can_be_storage)); - create_texture(RB_SCOPE_BUFFERS, RB_TEX_VELOCITY_MSAA, RD::DATA_FORMAT_R16G16_SFLOAT, msaa_usage_bits, texture_samples); + if (msaa) { + create_texture(RB_SCOPE_BUFFERS, RB_TEX_VELOCITY_MSAA, get_velocity_format(), get_velocity_usage_bits(false, msaa, can_be_storage), texture_samples); } - - create_texture(RB_SCOPE_BUFFERS, RB_TEX_VELOCITY, RD::DATA_FORMAT_R16G16_SFLOAT, usage_bits); } } @@ -724,3 +671,62 @@ RID RenderSceneBuffersRD::get_velocity_buffer(bool p_get_msaa, uint32_t p_layer) } } } + +uint32_t RenderSceneBuffersRD::get_color_usage_bits(bool p_resolve, bool p_msaa, bool p_storage) { + DEV_ASSERT((!p_resolve && !p_msaa) || (p_resolve != p_msaa)); + + uint32_t usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_INPUT_ATTACHMENT_BIT; + if (p_msaa) { + usage_bits |= RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT; + } else if (p_resolve) { + usage_bits |= RD::TEXTURE_USAGE_CAN_COPY_TO_BIT | (p_storage ? RD::TEXTURE_USAGE_STORAGE_BIT : 0); + } else { + usage_bits |= (p_storage ? RD::TEXTURE_USAGE_STORAGE_BIT : 0); + } + + return usage_bits; +} + +RD::DataFormat RenderSceneBuffersRD::get_depth_format(bool p_resolve, bool p_msaa, bool p_storage) { + if (p_resolve) { + return RD::DATA_FORMAT_R32_SFLOAT; + } else { + const RenderingDeviceCommons::DataFormat preferred_formats[2] = { + p_storage ? RD::DATA_FORMAT_D32_SFLOAT_S8_UINT : RD::DATA_FORMAT_D24_UNORM_S8_UINT, + p_storage ? RD::DATA_FORMAT_D24_UNORM_S8_UINT : RD::DATA_FORMAT_D32_SFLOAT_S8_UINT + }; + + return RD::get_singleton()->texture_is_format_supported_for_usage(preferred_formats[0], get_depth_usage_bits(p_resolve, p_msaa, p_storage)) ? preferred_formats[0] : preferred_formats[1]; + } +} + +uint32_t RenderSceneBuffersRD::get_depth_usage_bits(bool p_resolve, bool p_msaa, bool p_storage) { + DEV_ASSERT((!p_resolve && !p_msaa) || (p_resolve != p_msaa)); + + uint32_t usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT; + if (p_msaa) { + usage_bits |= RD::TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT; + } else if (p_resolve) { + usage_bits |= RD::TEXTURE_USAGE_CAN_COPY_TO_BIT | (p_storage ? RD::TEXTURE_USAGE_STORAGE_BIT : 0); + } else { + usage_bits |= RD::TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; + } + + return usage_bits; +} + +RD::DataFormat RenderSceneBuffersRD::get_velocity_format() { + return RD::DATA_FORMAT_R16G16_SFLOAT; +} + +uint32_t RenderSceneBuffersRD::get_velocity_usage_bits(bool p_resolve, bool p_msaa, bool p_storage) { + return get_color_usage_bits(p_resolve, p_msaa, p_storage); +} + +RD::DataFormat RenderSceneBuffersRD::get_vrs_format() { + return RD::DATA_FORMAT_R8_UINT; +} + +uint32_t RenderSceneBuffersRD::get_vrs_usage_bits() { + return RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_VRS_ATTACHMENT_BIT | RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; +} diff --git a/servers/rendering/renderer_rd/storage_rd/render_scene_buffers_rd.h b/servers/rendering/renderer_rd/storage_rd/render_scene_buffers_rd.h index 0025fc5ab715..187dbab44509 100644 --- a/servers/rendering/renderer_rd/storage_rd/render_scene_buffers_rd.h +++ b/servers/rendering/renderer_rd/storage_rd/render_scene_buffers_rd.h @@ -178,6 +178,7 @@ class RenderSceneBuffersRD : public RenderSceneBuffers { // info from our renderer void set_can_be_storage(const bool p_can_be_storage) { can_be_storage = p_can_be_storage; } + bool get_can_be_storage() const { return can_be_storage; } void set_max_cluster_elements(const uint32_t p_max_elements) { max_cluster_elements = p_max_elements; } uint32_t get_max_cluster_elements() { return max_cluster_elements; } void set_base_data_format(const RD::DataFormat p_base_data_format) { base_data_format = p_base_data_format; } @@ -305,6 +306,30 @@ class RenderSceneBuffersRD : public RenderSceneBuffers { return samplers; } + _FORCE_INLINE_ static RD::TextureSamples msaa_to_samples(RS::ViewportMSAA p_msaa) { + switch (p_msaa) { + case RS::VIEWPORT_MSAA_DISABLED: + return RD::TEXTURE_SAMPLES_1; + case RS::VIEWPORT_MSAA_2X: + return RD::TEXTURE_SAMPLES_2; + case RS::VIEWPORT_MSAA_4X: + return RD::TEXTURE_SAMPLES_4; + case RS::VIEWPORT_MSAA_8X: + return RD::TEXTURE_SAMPLES_8; + default: + DEV_ASSERT(false && "Unknown MSAA option."); + return RD::TEXTURE_SAMPLES_1; + } + } + + static uint32_t get_color_usage_bits(bool p_resolve, bool p_msaa, bool p_storage); + static RD::DataFormat get_depth_format(bool p_resolve, bool p_msaa, bool p_storage); + static uint32_t get_depth_usage_bits(bool p_resolve, bool p_msaa, bool p_storage); + static RD::DataFormat get_velocity_format(); + static uint32_t get_velocity_usage_bits(bool p_resolve, bool p_msaa, bool p_storage); + static RD::DataFormat get_vrs_format(); + static uint32_t get_vrs_usage_bits(); + private: //////////////////////////////////////////////////////////////////////////////////////////////////////////// // Our classDB doesn't support calling our normal exposed functions diff --git a/servers/rendering/renderer_rd/storage_rd/texture_storage.cpp b/servers/rendering/renderer_rd/storage_rd/texture_storage.cpp index e5a8dbb9b2b8..d6a31f81e166 100644 --- a/servers/rendering/renderer_rd/storage_rd/texture_storage.cpp +++ b/servers/rendering/renderer_rd/storage_rd/texture_storage.cpp @@ -185,9 +185,8 @@ TextureStorage::TextureStorage() { ptr[i] = Math::make_half_float(1.0f); } - Vector> vpv; - vpv.push_back(sv); - default_rd_textures[DEFAULT_RD_TEXTURE_DEPTH] = RD::get_singleton()->texture_create(tf, RD::TextureView(), vpv); + default_rd_textures[DEFAULT_RD_TEXTURE_DEPTH] = RD::get_singleton()->texture_create(tf, RD::TextureView()); + RD::get_singleton()->texture_update(default_rd_textures[DEFAULT_RD_TEXTURE_DEPTH], 0, sv); } for (int i = 0; i < 16; i++) { @@ -460,9 +459,8 @@ TextureStorage::TextureStorage() { } { - Vector> vsv; - vsv.push_back(sv); - default_rd_textures[DEFAULT_RD_TEXTURE_2D_ARRAY_DEPTH] = RD::get_singleton()->texture_create(tformat, RD::TextureView(), vsv); + default_rd_textures[DEFAULT_RD_TEXTURE_2D_ARRAY_DEPTH] = RD::get_singleton()->texture_create(tformat, RD::TextureView()); + RD::get_singleton()->texture_update(default_rd_textures[DEFAULT_RD_TEXTURE_2D_ARRAY_DEPTH], 0, sv); } } @@ -3278,13 +3276,13 @@ void TextureStorage::_update_render_target(RenderTarget *rt) { if (rt->size.width == 0 || rt->size.height == 0) { return; } + + rt->color_format = render_target_get_color_format(rt->use_hdr, false); + rt->color_format_srgb = render_target_get_color_format(rt->use_hdr, true); + if (rt->use_hdr) { - rt->color_format = RendererSceneRenderRD::get_singleton()->_render_buffers_get_color_format(); - rt->color_format_srgb = rt->color_format; rt->image_format = rt->is_transparent ? Image::FORMAT_RGBAH : Image::FORMAT_RGBH; } else { - rt->color_format = RD::DATA_FORMAT_R8G8B8A8_UNORM; - rt->color_format_srgb = RD::DATA_FORMAT_R8G8B8A8_SRGB; rt->image_format = rt->is_transparent ? Image::FORMAT_RGBA8 : Image::FORMAT_RGB8; } @@ -3303,8 +3301,7 @@ void TextureStorage::_update_render_target(RenderTarget *rt) { rd_color_attachment_format.texture_type = RD::TEXTURE_TYPE_2D; } rd_color_attachment_format.samples = RD::TEXTURE_SAMPLES_1; - rd_color_attachment_format.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT; - rd_color_attachment_format.usage_bits |= RD::TEXTURE_USAGE_STORAGE_BIT; // FIXME we need this only when FSR is enabled + rd_color_attachment_format.usage_bits = render_target_get_color_usage_bits(false); rd_color_attachment_format.shareable_formats.push_back(rt->color_format); rd_color_attachment_format.shareable_formats.push_back(rt->color_format_srgb); if (rt->msaa != RS::VIEWPORT_MSAA_DISABLED) { @@ -3326,7 +3323,7 @@ void TextureStorage::_update_render_target(RenderTarget *rt) { RD::TEXTURE_SAMPLES_8, }; rd_color_multisample_format.samples = texture_samples[rt->msaa]; - rd_color_multisample_format.usage_bits = RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT; + rd_color_multisample_format.usage_bits = render_target_get_color_usage_bits(true); RD::TextureView rd_view_multisample; rd_color_multisample_format.is_resolve_buffer = false; rt->color_multisample = RD::get_singleton()->texture_create(rd_color_multisample_format, rd_view_multisample); @@ -4216,3 +4213,20 @@ RID TextureStorage::render_target_get_vrs_texture(RID p_render_target) const { return rt->vrs_texture; } + +RD::DataFormat TextureStorage::render_target_get_color_format(bool p_use_hdr, bool p_srgb) { + if (p_use_hdr) { + return RendererSceneRenderRD::get_singleton()->_render_buffers_get_color_format(); + } else { + return p_srgb ? RD::DATA_FORMAT_R8G8B8A8_SRGB : RD::DATA_FORMAT_R8G8B8A8_UNORM; + } +} + +uint32_t TextureStorage::render_target_get_color_usage_bits(bool p_msaa) { + if (p_msaa) { + return RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT; + } else { + // FIXME: Storage bit should only be requested when FSR is required. + return RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; + } +} diff --git a/servers/rendering/renderer_rd/storage_rd/texture_storage.h b/servers/rendering/renderer_rd/storage_rd/texture_storage.h index 9de4ff7b6b91..1c494eeb6865 100644 --- a/servers/rendering/renderer_rd/storage_rd/texture_storage.h +++ b/servers/rendering/renderer_rd/storage_rd/texture_storage.h @@ -777,6 +777,9 @@ class TextureStorage : public RendererTextureStorage { void render_target_set_framebuffer_uniform_set(RID p_render_target, RID p_uniform_set); void render_target_set_backbuffer_uniform_set(RID p_render_target, RID p_uniform_set); + + static RD::DataFormat render_target_get_color_format(bool p_use_hdr, bool p_srgb); + static uint32_t render_target_get_color_usage_bits(bool p_msaa); }; } // namespace RendererRD diff --git a/servers/rendering/renderer_scene_cull.cpp b/servers/rendering/renderer_scene_cull.cpp index 43abb22e3d8e..8b7ec0886853 100644 --- a/servers/rendering/renderer_scene_cull.cpp +++ b/servers/rendering/renderer_scene_cull.cpp @@ -1690,6 +1690,14 @@ Variant RendererSceneCull::instance_geometry_get_shader_parameter_default_value( return Variant(); } +void RendererSceneCull::mesh_generate_pipelines(RID p_mesh, bool p_background_compilation) { + scene_render->mesh_generate_pipelines(p_mesh, p_background_compilation); +} + +uint32_t RendererSceneCull::get_pipeline_compilations(RS::PipelineSource p_source) { + return scene_render->get_pipeline_compilations(p_source); +} + void RendererSceneCull::instance_geometry_get_shader_parameter_list(RID p_instance, List *p_parameters) const { const Instance *instance = const_cast(this)->instance_owner.get_or_null(p_instance); ERR_FAIL_NULL(instance); diff --git a/servers/rendering/renderer_scene_cull.h b/servers/rendering/renderer_scene_cull.h index 972f66d325ff..5aae59eb51d3 100644 --- a/servers/rendering/renderer_scene_cull.h +++ b/servers/rendering/renderer_scene_cull.h @@ -1102,6 +1102,9 @@ class RendererSceneCull : public RenderingMethod { virtual Variant instance_geometry_get_shader_parameter(RID p_instance, const StringName &p_parameter) const; virtual Variant instance_geometry_get_shader_parameter_default_value(RID p_instance, const StringName &p_parameter) const; + virtual void mesh_generate_pipelines(RID p_mesh, bool p_background_compilation); + virtual uint32_t get_pipeline_compilations(RS::PipelineSource p_source); + _FORCE_INLINE_ void _update_instance(Instance *p_instance); _FORCE_INLINE_ void _update_instance_aabb(Instance *p_instance); _FORCE_INLINE_ void _update_dirty_instance(Instance *p_instance); diff --git a/servers/rendering/renderer_scene_render.h b/servers/rendering/renderer_scene_render.h index 72ccbcdf1191..4d81a9b6a318 100644 --- a/servers/rendering/renderer_scene_render.h +++ b/servers/rendering/renderer_scene_render.h @@ -58,6 +58,11 @@ class RendererSceneRender { virtual void geometry_instance_free(RenderGeometryInstance *p_geometry_instance) = 0; virtual uint32_t geometry_instance_get_pair_mask() = 0; + /* PIPELINES */ + + virtual void mesh_generate_pipelines(RID p_mesh, bool p_background_compilation) = 0; + virtual uint32_t get_pipeline_compilations(RS::PipelineSource p_source) = 0; + /* SDFGI UPDATE */ virtual void sdfgi_update(const Ref &p_render_buffers, RID p_environment, const Vector3 &p_world_position) = 0; diff --git a/servers/rendering/rendering_device.cpp b/servers/rendering/rendering_device.cpp index e322bba76878..b2875509867b 100644 --- a/servers/rendering/rendering_device.cpp +++ b/servers/rendering/rendering_device.cpp @@ -36,7 +36,15 @@ #include "core/config/project_settings.h" #include "core/io/dir_access.h" +// TODO: Thread safety +// - Roll back thread safe attribute for RID_Owner members after the read-only/atomic update scheme is implemented. + #define FORCE_SEPARATE_PRESENT_QUEUE 0 +#define PRINT_FRAMEBUFFER_FORMAT 0 + +#define ERR_RENDER_THREAD_MSG String("This function (") + String(__func__) + String(") can only be called from the render thread. ") +#define ERR_RENDER_THREAD_GUARD() ERR_FAIL_COND_MSG(render_thread_id != Thread::get_caller_id(), ERR_RENDER_THREAD_MSG); +#define ERR_RENDER_THREAD_GUARD_V(m_ret) ERR_FAIL_COND_V_MSG(render_thread_id != Thread::get_caller_id(), (m_ret), ERR_RENDER_THREAD_MSG); /**************************/ /**** HELPER FUNCTIONS ****/ @@ -138,6 +146,8 @@ RenderingDevice::ShaderSPIRVGetCacheKeyFunction RenderingDevice::get_spirv_cache /***************************/ void RenderingDevice::_add_dependency(RID p_id, RID p_depends_on) { + _THREAD_SAFE_METHOD_ + HashSet *set = dependency_map.getptr(p_depends_on); if (set == nullptr) { set = &dependency_map.insert(p_depends_on, HashSet())->value; @@ -152,6 +162,8 @@ void RenderingDevice::_add_dependency(RID p_id, RID p_depends_on) { } void RenderingDevice::_free_dependencies(RID p_id) { + _THREAD_SAFE_METHOD_ + // Direct dependencies must be freed. HashMap>::Iterator E = dependency_map.find(p_id); @@ -236,6 +248,35 @@ RenderingDevice::Buffer *RenderingDevice::_get_buffer_from_owner(RID p_buffer) { return buffer; } +Error RenderingDevice::_buffer_initialize(Buffer *p_buffer, const uint8_t *p_data, size_t p_data_size, uint32_t p_required_align) { + uint32_t transfer_worker_offset; + TransferWorker *transfer_worker = _acquire_transfer_worker(p_data_size, p_required_align, transfer_worker_offset); + p_buffer->transfer_worker_index = transfer_worker->index; + + { + MutexLock lock(transfer_worker->operations_mutex); + p_buffer->transfer_worker_operation = ++transfer_worker->operations_counter; + } + + // Copy to the worker's staging buffer. + uint8_t *data_ptr = driver->buffer_map(transfer_worker->staging_buffer); + ERR_FAIL_NULL_V(data_ptr, ERR_CANT_CREATE); + + memcpy(data_ptr + transfer_worker_offset, p_data, p_data_size); + driver->buffer_unmap(transfer_worker->staging_buffer); + + // Copy from the staging buffer to the real buffer. + RDD::BufferCopyRegion region; + region.src_offset = transfer_worker_offset; + region.dst_offset = 0; + region.size = p_data_size; + driver->command_copy_buffer(transfer_worker->command_buffer, transfer_worker->staging_buffer, p_buffer->driver_id, region); + + _release_transfer_worker(transfer_worker); + + return OK; +} + Error RenderingDevice::_insert_staging_block() { StagingBufferBlock block; @@ -386,32 +427,89 @@ void RenderingDevice::_staging_buffer_execute_required_action(StagingRequiredAct } } -Error RenderingDevice::_buffer_update(Buffer *p_buffer, RID p_buffer_id, size_t p_offset, const uint8_t *p_data, size_t p_data_size, bool p_use_draw_queue, uint32_t p_required_align) { +Error RenderingDevice::buffer_copy(RID p_src_buffer, RID p_dst_buffer, uint32_t p_src_offset, uint32_t p_dst_offset, uint32_t p_size) { + ERR_RENDER_THREAD_GUARD_V(ERR_UNAVAILABLE); + + ERR_FAIL_COND_V_MSG(draw_list, ERR_INVALID_PARAMETER, + "Copying buffers is forbidden during creation of a draw list"); + ERR_FAIL_COND_V_MSG(compute_list, ERR_INVALID_PARAMETER, + "Copying buffers is forbidden during creation of a compute list"); + + Buffer *src_buffer = _get_buffer_from_owner(p_src_buffer); + if (!src_buffer) { + ERR_FAIL_V_MSG(ERR_INVALID_PARAMETER, "Source buffer argument is not a valid buffer of any type."); + } + + Buffer *dst_buffer = _get_buffer_from_owner(p_dst_buffer); + if (!dst_buffer) { + ERR_FAIL_V_MSG(ERR_INVALID_PARAMETER, "Destination buffer argument is not a valid buffer of any type."); + } + + // Validate the copy's dimensions for both buffers. + ERR_FAIL_COND_V_MSG((p_size + p_src_offset) > src_buffer->size, ERR_INVALID_PARAMETER, "Size is larger than the source buffer."); + ERR_FAIL_COND_V_MSG((p_size + p_dst_offset) > dst_buffer->size, ERR_INVALID_PARAMETER, "Size is larger than the destination buffer."); + + _check_transfer_worker_buffer(src_buffer); + _check_transfer_worker_buffer(dst_buffer); + + // Perform the copy. + RDD::BufferCopyRegion region; + region.src_offset = p_src_offset; + region.dst_offset = p_dst_offset; + region.size = p_size; + + if (_buffer_make_mutable(dst_buffer, p_dst_buffer)) { + // The destination buffer must be mutable to be used as a copy destination. + draw_graph.add_synchronization(); + } + + draw_graph.add_buffer_copy(src_buffer->driver_id, src_buffer->draw_tracker, dst_buffer->driver_id, dst_buffer->draw_tracker, region); + + return OK; +} + +Error RenderingDevice::buffer_update(RID p_buffer, uint32_t p_offset, uint32_t p_size, const void *p_data) { + ERR_RENDER_THREAD_GUARD_V(ERR_UNAVAILABLE); + + copy_bytes_count += p_size; + + ERR_FAIL_COND_V_MSG(draw_list, ERR_INVALID_PARAMETER, + "Updating buffers is forbidden during creation of a draw list"); + ERR_FAIL_COND_V_MSG(compute_list, ERR_INVALID_PARAMETER, + "Updating buffers is forbidden during creation of a compute list"); + + Buffer *buffer = _get_buffer_from_owner(p_buffer); + ERR_FAIL_NULL_V_MSG(buffer, ERR_INVALID_PARAMETER, "Buffer argument is not a valid buffer of any type."); + ERR_FAIL_COND_V_MSG(p_offset + p_size > buffer->size, ERR_INVALID_PARAMETER, "Attempted to write buffer (" + itos((p_offset + p_size) - buffer->size) + " bytes) past the end."); + + _check_transfer_worker_buffer(buffer); + // Submitting may get chunked for various reasons, so convert this to a task. - size_t to_submit = p_data_size; + size_t to_submit = p_size; size_t submit_from = 0; thread_local LocalVector command_buffer_copies_vector; command_buffer_copies_vector.clear(); + const uint8_t *src_data = reinterpret_cast(p_data); + const uint32_t required_align = 32; while (to_submit > 0) { uint32_t block_write_offset; uint32_t block_write_amount; StagingRequiredAction required_action; - Error err = _staging_buffer_allocate(MIN(to_submit, staging_buffer_block_size), p_required_align, block_write_offset, block_write_amount, required_action); + Error err = _staging_buffer_allocate(MIN(to_submit, staging_buffer_block_size), required_align, block_write_offset, block_write_amount, required_action); if (err) { return err; } - if (p_use_draw_queue && !command_buffer_copies_vector.is_empty() && required_action == STAGING_REQUIRED_ACTION_FLUSH_AND_STALL_ALL) { - if (_buffer_make_mutable(p_buffer, p_buffer_id)) { + if (!command_buffer_copies_vector.is_empty() && required_action == STAGING_REQUIRED_ACTION_FLUSH_AND_STALL_ALL) { + if (_buffer_make_mutable(buffer, p_buffer)) { // The buffer must be mutable to be used as a copy destination. draw_graph.add_synchronization(); } - // If we're using the draw queue and the staging buffer requires flushing everything, we submit the command early and clear the current vector. - draw_graph.add_buffer_update(p_buffer->driver_id, p_buffer->draw_tracker, command_buffer_copies_vector); + draw_graph.add_buffer_update(buffer->driver_id, buffer->draw_tracker, command_buffer_copies_vector); command_buffer_copies_vector.clear(); } @@ -422,7 +520,7 @@ Error RenderingDevice::_buffer_update(Buffer *p_buffer, RID p_buffer_id, size_t ERR_FAIL_NULL_V(data_ptr, ERR_CANT_CREATE); // Copy to staging buffer. - memcpy(data_ptr + block_write_offset, p_data + submit_from, block_write_amount); + memcpy(data_ptr + block_write_offset, src_data + submit_from, block_write_amount); // Unmap. driver->buffer_unmap(staging_buffer_blocks[staging_buffer_current].driver_id); @@ -433,14 +531,10 @@ Error RenderingDevice::_buffer_update(Buffer *p_buffer, RID p_buffer_id, size_t region.dst_offset = submit_from + p_offset; region.size = block_write_amount; - if (p_use_draw_queue) { - RDG::RecordedBufferCopy buffer_copy; - buffer_copy.source = staging_buffer_blocks[staging_buffer_current].driver_id; - buffer_copy.region = region; - command_buffer_copies_vector.push_back(buffer_copy); - } else { - driver->command_copy_buffer(frames[frame].setup_command_buffer, staging_buffer_blocks[staging_buffer_current].driver_id, p_buffer->driver_id, region); - } + RDG::RecordedBufferCopy buffer_copy; + buffer_copy.source = staging_buffer_blocks[staging_buffer_current].driver_id; + buffer_copy.region = region; + command_buffer_copies_vector.push_back(buffer_copy); staging_buffer_blocks.write[staging_buffer_current].fill_amount = block_write_offset + block_write_amount; @@ -448,77 +542,18 @@ Error RenderingDevice::_buffer_update(Buffer *p_buffer, RID p_buffer_id, size_t submit_from += block_write_amount; } - if (p_use_draw_queue && !command_buffer_copies_vector.is_empty()) { - if (_buffer_make_mutable(p_buffer, p_buffer_id)) { + if (!command_buffer_copies_vector.is_empty()) { + if (_buffer_make_mutable(buffer, p_buffer)) { // The buffer must be mutable to be used as a copy destination. draw_graph.add_synchronization(); } - draw_graph.add_buffer_update(p_buffer->driver_id, p_buffer->draw_tracker, command_buffer_copies_vector); - } - - return OK; -} - -Error RenderingDevice::buffer_copy(RID p_src_buffer, RID p_dst_buffer, uint32_t p_src_offset, uint32_t p_dst_offset, uint32_t p_size) { - _THREAD_SAFE_METHOD_ - - ERR_FAIL_COND_V_MSG(draw_list, ERR_INVALID_PARAMETER, - "Copying buffers is forbidden during creation of a draw list"); - ERR_FAIL_COND_V_MSG(compute_list, ERR_INVALID_PARAMETER, - "Copying buffers is forbidden during creation of a compute list"); - - Buffer *src_buffer = _get_buffer_from_owner(p_src_buffer); - if (!src_buffer) { - ERR_FAIL_V_MSG(ERR_INVALID_PARAMETER, "Source buffer argument is not a valid buffer of any type."); - } - - Buffer *dst_buffer = _get_buffer_from_owner(p_dst_buffer); - if (!dst_buffer) { - ERR_FAIL_V_MSG(ERR_INVALID_PARAMETER, "Destination buffer argument is not a valid buffer of any type."); - } - - // Validate the copy's dimensions for both buffers. - ERR_FAIL_COND_V_MSG((p_size + p_src_offset) > src_buffer->size, ERR_INVALID_PARAMETER, "Size is larger than the source buffer."); - ERR_FAIL_COND_V_MSG((p_size + p_dst_offset) > dst_buffer->size, ERR_INVALID_PARAMETER, "Size is larger than the destination buffer."); - - // Perform the copy. - RDD::BufferCopyRegion region; - region.src_offset = p_src_offset; - region.dst_offset = p_dst_offset; - region.size = p_size; - - if (_buffer_make_mutable(dst_buffer, p_dst_buffer)) { - // The destination buffer must be mutable to be used as a copy destination. - draw_graph.add_synchronization(); + draw_graph.add_buffer_update(buffer->driver_id, buffer->draw_tracker, command_buffer_copies_vector); } - draw_graph.add_buffer_copy(src_buffer->driver_id, src_buffer->draw_tracker, dst_buffer->driver_id, dst_buffer->draw_tracker, region); - - return OK; -} - -Error RenderingDevice::buffer_update(RID p_buffer, uint32_t p_offset, uint32_t p_size, const void *p_data) { - _THREAD_SAFE_METHOD_ - - copy_bytes_count += p_size; - - ERR_FAIL_COND_V_MSG(draw_list, ERR_INVALID_PARAMETER, - "Updating buffers is forbidden during creation of a draw list"); - ERR_FAIL_COND_V_MSG(compute_list, ERR_INVALID_PARAMETER, - "Updating buffers is forbidden during creation of a compute list"); - - Buffer *buffer = _get_buffer_from_owner(p_buffer); - if (!buffer) { - ERR_FAIL_V_MSG(ERR_INVALID_PARAMETER, "Buffer argument is not a valid buffer of any type."); - } - - ERR_FAIL_COND_V_MSG(p_offset + p_size > buffer->size, ERR_INVALID_PARAMETER, - "Attempted to write buffer (" + itos((p_offset + p_size) - buffer->size) + " bytes) past the end."); - gpu_copy_count++; - return _buffer_update(buffer, p_buffer, p_offset, (uint8_t *)p_data, p_size, true); + return OK; } String RenderingDevice::get_perf_report() const { @@ -534,7 +569,7 @@ void RenderingDevice::update_perf_report() { } Error RenderingDevice::buffer_clear(RID p_buffer, uint32_t p_offset, uint32_t p_size) { - _THREAD_SAFE_METHOD_ + ERR_RENDER_THREAD_GUARD_V(ERR_UNAVAILABLE); ERR_FAIL_COND_V_MSG((p_size % 4) != 0, ERR_INVALID_PARAMETER, "Size must be a multiple of four"); @@ -551,6 +586,8 @@ Error RenderingDevice::buffer_clear(RID p_buffer, uint32_t p_offset, uint32_t p_ ERR_FAIL_COND_V_MSG(p_offset + p_size > buffer->size, ERR_INVALID_PARAMETER, "Attempted to write buffer (" + itos((p_offset + p_size) - buffer->size) + " bytes) past the end."); + _check_transfer_worker_buffer(buffer); + if (_buffer_make_mutable(buffer, p_buffer)) { // The destination buffer must be mutable to be used as a clear destination. draw_graph.add_synchronization(); @@ -562,7 +599,7 @@ Error RenderingDevice::buffer_clear(RID p_buffer, uint32_t p_offset, uint32_t p_ } Vector RenderingDevice::buffer_get_data(RID p_buffer, uint32_t p_offset, uint32_t p_size) { - _THREAD_SAFE_METHOD_ + ERR_RENDER_THREAD_GUARD_V(Vector()); Buffer *buffer = _get_buffer_from_owner(p_buffer); if (!buffer) { @@ -577,6 +614,8 @@ Vector RenderingDevice::buffer_get_data(RID p_buffer, uint32_t p_offset "Size is larger than the buffer."); } + _check_transfer_worker_buffer(buffer); + RDD::BufferID tmp_buffer = driver->buffer_create(buffer->size, RDD::BUFFER_USAGE_TRANSFER_TO_BIT, RDD::MEMORY_ALLOCATION_TYPE_CPU); ERR_FAIL_COND_V(!tmp_buffer, Vector()); @@ -607,8 +646,6 @@ Vector RenderingDevice::buffer_get_data(RID p_buffer, uint32_t p_offset } RID RenderingDevice::storage_buffer_create(uint32_t p_size_bytes, const Vector &p_data, BitField p_usage) { - _THREAD_SAFE_METHOD_ - ERR_FAIL_COND_V(p_data.size() && (uint32_t)p_data.size() != p_size_bytes, RID()); Buffer buffer; @@ -625,10 +662,12 @@ RID RenderingDevice::storage_buffer_create(uint32_t p_size_bytes, const Vectorbuffer_driver_id = buffer.driver_id; if (p_data.size()) { - _buffer_update(&buffer, RID(), 0, p_data.ptr(), p_data.size()); + _buffer_initialize(&buffer, p_data.ptr(), p_data.size()); } + _THREAD_SAFE_LOCK_ buffer_memory += buffer.size; + _THREAD_SAFE_UNLOCK_ RID id = storage_buffer_owner.make_rid(buffer); #ifdef DEV_ENABLED @@ -638,8 +677,6 @@ RID RenderingDevice::storage_buffer_create(uint32_t p_size_bytes, const Vector &p_data) { - _THREAD_SAFE_METHOD_ - uint32_t element_size = get_format_vertex_size(p_format); ERR_FAIL_COND_V_MSG(element_size == 0, RID(), "Format requested is not supported for texture buffers"); uint64_t size_bytes = uint64_t(element_size) * p_size_elements; @@ -665,10 +702,12 @@ RID RenderingDevice::texture_buffer_create(uint32_t p_size_elements, DataFormat } if (p_data.size()) { - _buffer_update(&texture_buffer, RID(), 0, p_data.ptr(), p_data.size()); + _buffer_initialize(&texture_buffer, p_data.ptr(), p_data.size()); } + _THREAD_SAFE_LOCK_ buffer_memory += size_bytes; + _THREAD_SAFE_UNLOCK_ RID id = texture_buffer_owner.make_rid(texture_buffer); #ifdef DEV_ENABLED @@ -682,8 +721,6 @@ RID RenderingDevice::texture_buffer_create(uint32_t p_size_elements, DataFormat /*****************/ RID RenderingDevice::texture_create(const TextureFormat &p_format, const TextureView &p_view, const Vector> &p_data) { - _THREAD_SAFE_METHOD_ - // Some adjustments will happen. TextureFormat format = p_format; @@ -740,6 +777,9 @@ RID RenderingDevice::texture_create(const TextureFormat &p_format, const Texture "Data for slice index " + itos(i) + " (mapped to layer " + itos(i) + ") differs in size (supplied: " + itos(p_data[i].size()) + ") than what is required by the format (" + itos(required_size) + ")."); } + ERR_FAIL_COND_V_MSG(format.usage_bits & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, RID(), + "Textures created as depth attachments can't be initialized with data directly. Use RenderingDevice::texture_update() instead."); + if (!(format.usage_bits & TEXTURE_USAGE_CAN_UPDATE_BIT)) { forced_usage_bits = TEXTURE_USAGE_CAN_UPDATE_BIT; } @@ -839,7 +879,7 @@ RID RenderingDevice::texture_create(const TextureFormat &p_format, const Texture if (p_data.size()) { for (uint32_t i = 0; i < p_format.array_layers; i++) { - _texture_update(id, i, p_data[i], true, false); + _texture_initialize(id, i, p_data[i]); } if (texture.draw_tracker != nullptr) { @@ -852,8 +892,6 @@ RID RenderingDevice::texture_create(const TextureFormat &p_format, const Texture } RID RenderingDevice::texture_create_shared(const TextureView &p_view, RID p_with_texture) { - _THREAD_SAFE_METHOD_ - Texture *src_texture = texture_owner.get_or_null(p_with_texture); ERR_FAIL_NULL_V(src_texture, RID()); @@ -939,7 +977,6 @@ RID RenderingDevice::texture_create_shared(const TextureView &p_view, RID p_with } RID RenderingDevice::texture_create_from_extension(TextureType p_type, DataFormat p_format, TextureSamples p_samples, BitField p_usage, uint64_t p_image, uint64_t p_width, uint64_t p_height, uint64_t p_depth, uint64_t p_layers) { - _THREAD_SAFE_METHOD_ // This method creates a texture object using a VkImage created by an extension, module or other external source (OpenXR uses this). Texture texture; @@ -982,8 +1019,6 @@ RID RenderingDevice::texture_create_from_extension(TextureType p_type, DataForma } RID RenderingDevice::texture_create_shared_from_slice(const TextureView &p_view, RID p_with_texture, uint32_t p_layer, uint32_t p_mipmap, uint32_t p_mipmaps, TextureSliceType p_slice_type, uint32_t p_layers) { - _THREAD_SAFE_METHOD_ - Texture *src_texture = texture_owner.get_or_null(p_with_texture); ERR_FAIL_NULL_V(src_texture, RID()); @@ -1124,10 +1159,6 @@ RID RenderingDevice::texture_create_shared_from_slice(const TextureView &p_view, return id; } -Error RenderingDevice::texture_update(RID p_texture, uint32_t p_layer, const Vector &p_data) { - return _texture_update(p_texture, p_layer, p_data, false, true); -} - static _ALWAYS_INLINE_ void _copy_region(uint8_t const *__restrict p_src, uint8_t *__restrict p_dst, uint32_t p_src_x, uint32_t p_src_y, uint32_t p_src_w, uint32_t p_src_h, uint32_t p_src_full_w, uint32_t p_dst_pitch, uint32_t p_unit_size) { uint32_t src_offset = (p_src_y * p_src_full_w + p_src_x) * p_unit_size; uint32_t dst_offset = 0; @@ -1144,11 +1175,184 @@ static _ALWAYS_INLINE_ void _copy_region(uint8_t const *__restrict p_src, uint8_ } } -Error RenderingDevice::_texture_update(RID p_texture, uint32_t p_layer, const Vector &p_data, bool p_use_setup_queue, bool p_validate_can_update) { - _THREAD_SAFE_METHOD_ +static _ALWAYS_INLINE_ void _copy_region_block_or_regular(const uint8_t *p_read_ptr, uint8_t *p_write_ptr, uint32_t p_x, uint32_t p_y, uint32_t p_width, uint32_t p_region_w, uint32_t p_region_h, uint32_t p_block_w, uint32_t p_block_h, uint32_t p_dst_pitch, uint32_t p_pixel_size, uint32_t p_block_size) { + if (p_block_w != 1 || p_block_h != 1) { + // Block format. + uint32_t xb = p_x / p_block_w; + uint32_t yb = p_y / p_block_h; + uint32_t wb = p_width / p_block_w; + uint32_t region_wb = p_region_w / p_block_w; + uint32_t region_hb = p_region_h / p_block_h; + _copy_region(p_read_ptr, p_write_ptr, xb, yb, region_wb, region_hb, wb, p_dst_pitch, p_block_size); + } else { + // Regular format. + _copy_region(p_read_ptr, p_write_ptr, p_x, p_y, p_region_w, p_region_h, p_width, p_dst_pitch, p_pixel_size); + } +} - ERR_FAIL_COND_V_MSG((draw_list || compute_list) && !p_use_setup_queue, ERR_INVALID_PARAMETER, - "Updating textures is forbidden during creation of a draw or compute list"); +uint32_t RenderingDevice::_texture_layer_count(Texture *p_texture) const { + switch (p_texture->type) { + case TEXTURE_TYPE_CUBE: + case TEXTURE_TYPE_CUBE_ARRAY: + return p_texture->layers * 6; + default: + return p_texture->layers; + } +} + +uint32_t RenderingDevice::_texture_alignment(Texture *p_texture) const { + uint32_t alignment = get_compressed_image_format_block_byte_size(p_texture->format); + if (alignment == 1) { + alignment = get_image_format_pixel_size(p_texture->format); + } + + return STEPIFY(alignment, driver->api_trait_get(RDD::API_TRAIT_TEXTURE_TRANSFER_ALIGNMENT)); +} + +Error RenderingDevice::_texture_initialize(RID p_texture, uint32_t p_layer, const Vector &p_data) { + Texture *texture = texture_owner.get_or_null(p_texture); + ERR_FAIL_NULL_V(texture, ERR_INVALID_PARAMETER); + + if (texture->owner != RID()) { + p_texture = texture->owner; + texture = texture_owner.get_or_null(texture->owner); + ERR_FAIL_NULL_V(texture, ERR_BUG); // This is a bug. + } + + uint32_t layer_count = _texture_layer_count(texture); + ERR_FAIL_COND_V(p_layer >= layer_count, ERR_INVALID_PARAMETER); + + uint32_t width, height; + uint32_t tight_mip_size = get_image_format_required_size(texture->format, texture->width, texture->height, texture->depth, texture->mipmaps, &width, &height); + uint32_t required_size = tight_mip_size; + uint32_t required_align = _texture_alignment(texture); + + ERR_FAIL_COND_V_MSG(required_size != (uint32_t)p_data.size(), ERR_INVALID_PARAMETER, + "Required size for texture update (" + itos(required_size) + ") does not match data supplied size (" + itos(p_data.size()) + ")."); + + uint32_t block_w, block_h; + get_compressed_image_format_block_dimensions(texture->format, block_w, block_h); + + uint32_t pixel_size = get_image_format_pixel_size(texture->format); + uint32_t pixel_rshift = get_compressed_image_format_pixel_rshift(texture->format); + uint32_t block_size = get_compressed_image_format_block_byte_size(texture->format); + + // The algorithm operates on two passes, one to figure out the total size the staging buffer will require to allocate and another one where the copy is actually performed. + uint32_t staging_worker_offset = 0; + uint32_t staging_local_offset = 0; + TransferWorker *transfer_worker = nullptr; + const uint8_t *read_ptr = p_data.ptr(); + uint8_t *write_ptr = nullptr; + for (uint32_t pass = 0; pass < 2; pass++) { + const bool copy_pass = (pass == 1); + if (copy_pass) { + transfer_worker = _acquire_transfer_worker(staging_local_offset, required_align, staging_worker_offset); + texture->transfer_worker_index = transfer_worker->index; + + { + MutexLock lock(transfer_worker->operations_mutex); + texture->transfer_worker_operation = ++transfer_worker->operations_counter; + } + + staging_local_offset = 0; + + write_ptr = driver->buffer_map(transfer_worker->staging_buffer); + ERR_FAIL_NULL_V(write_ptr, ERR_CANT_CREATE); + + write_ptr += staging_worker_offset; + + if (driver->api_trait_get(RDD::API_TRAIT_HONORS_PIPELINE_BARRIERS)) { + // Transition the texture to the optimal layout. + RDD::TextureBarrier tb; + tb.texture = texture->driver_id; + tb.dst_access = RDD::BARRIER_ACCESS_COPY_WRITE_BIT; + tb.prev_layout = RDD::TEXTURE_LAYOUT_UNDEFINED; + tb.next_layout = RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL; + tb.subresources.aspect = texture->barrier_aspect_flags; + tb.subresources.mipmap_count = texture->mipmaps; + tb.subresources.base_layer = p_layer; + tb.subresources.layer_count = 1; + driver->command_pipeline_barrier(transfer_worker->command_buffer, RDD::PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, RDD::PIPELINE_STAGE_COPY_BIT, {}, {}, tb); + } + } + + uint32_t mipmap_offset = 0; + uint32_t logic_width = texture->width; + uint32_t logic_height = texture->height; + for (uint32_t mm_i = 0; mm_i < texture->mipmaps; mm_i++) { + uint32_t depth = 0; + uint32_t image_total = get_image_format_required_size(texture->format, texture->width, texture->height, texture->depth, mm_i + 1, &width, &height, &depth); + + const uint8_t *read_ptr_mipmap = read_ptr + mipmap_offset; + tight_mip_size = image_total - mipmap_offset; + + for (uint32_t z = 0; z < depth; z++) { + if (required_align > 0) { + uint32_t align_offset = staging_local_offset % required_align; + if (align_offset != 0) { + staging_local_offset += required_align - align_offset; + } + } + + uint32_t pitch = (width * pixel_size * block_w) >> pixel_rshift; + uint32_t pitch_step = driver->api_trait_get(RDD::API_TRAIT_TEXTURE_DATA_ROW_PITCH_STEP); + pitch = STEPIFY(pitch, pitch_step); + uint32_t to_allocate = pitch * height; + to_allocate >>= pixel_rshift; + + if (copy_pass) { + const uint8_t *read_ptr_mipmap_layer = read_ptr_mipmap + (tight_mip_size / depth) * z; + _copy_region_block_or_regular(read_ptr_mipmap_layer, write_ptr, 0, 0, width, width, height, block_w, block_h, pitch, pixel_size, block_size); + write_ptr += to_allocate; + + RDD::BufferTextureCopyRegion copy_region; + copy_region.buffer_offset = staging_worker_offset + staging_local_offset; + copy_region.texture_subresources.aspect = texture->read_aspect_flags; + copy_region.texture_subresources.mipmap = mm_i; + copy_region.texture_subresources.base_layer = p_layer; + copy_region.texture_subresources.layer_count = 1; + copy_region.texture_offset = Vector3i(0, 0, z); + copy_region.texture_region_size = Vector3i(logic_width, logic_height, 1); + driver->command_copy_buffer_to_texture(transfer_worker->command_buffer, transfer_worker->staging_buffer, texture->driver_id, RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL, copy_region); + } + + staging_local_offset += to_allocate; + } + + mipmap_offset = image_total; + logic_width = MAX(1u, logic_width >> 1); + logic_height = MAX(1u, logic_height >> 1); + } + + if (copy_pass) { + driver->buffer_unmap(transfer_worker->staging_buffer); + + // If the texture does not have a tracker, it means it must be transitioned to the sampling state. + if (texture->draw_tracker == nullptr && driver->api_trait_get(RDD::API_TRAIT_HONORS_PIPELINE_BARRIERS)) { + RDD::TextureBarrier tb; + tb.texture = texture->driver_id; + tb.src_access = RDD::BARRIER_ACCESS_COPY_WRITE_BIT; + tb.prev_layout = RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL; + tb.next_layout = RDD::TEXTURE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + tb.subresources.aspect = texture->barrier_aspect_flags; + tb.subresources.mipmap_count = texture->mipmaps; + tb.subresources.base_layer = p_layer; + tb.subresources.layer_count = 1; + + driver->command_pipeline_barrier(transfer_worker->command_buffer, RDD::PIPELINE_STAGE_COPY_BIT, RDD::PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, {}, {}, tb); + } + + _release_transfer_worker(transfer_worker); + } + } + + return OK; +} + +Error RenderingDevice::texture_update(RID p_texture, uint32_t p_layer, const Vector &p_data) { + ERR_RENDER_THREAD_GUARD_V(ERR_UNAVAILABLE); + + ERR_FAIL_COND_V_MSG(draw_list || compute_list, ERR_INVALID_PARAMETER, "Updating textures is forbidden during creation of a draw or compute list"); Texture *texture = texture_owner.get_or_null(p_texture); ERR_FAIL_NULL_V(texture, ERR_INVALID_PARAMETER); @@ -1162,47 +1366,37 @@ Error RenderingDevice::_texture_update(RID p_texture, uint32_t p_layer, const Ve ERR_FAIL_COND_V_MSG(texture->bound, ERR_CANT_ACQUIRE_RESOURCE, "Texture can't be updated while a draw list that uses it as part of a framebuffer is being created. Ensure the draw list is finalized (and that the color/depth texture using it is not set to `RenderingDevice.FINAL_ACTION_CONTINUE`) to update this texture."); - ERR_FAIL_COND_V_MSG(p_validate_can_update && !(texture->usage_flags & TEXTURE_USAGE_CAN_UPDATE_BIT), ERR_INVALID_PARAMETER, - "Texture requires the `RenderingDevice.TEXTURE_USAGE_CAN_UPDATE_BIT` to be set to be updatable."); + ERR_FAIL_COND_V_MSG(!(texture->usage_flags & TEXTURE_USAGE_CAN_UPDATE_BIT), ERR_INVALID_PARAMETER, "Texture requires the `RenderingDevice.TEXTURE_USAGE_CAN_UPDATE_BIT` to be set to be updatable."); - ERR_FAIL_COND_V(p_layer >= texture->layers, ERR_INVALID_PARAMETER); + uint32_t layer_count = _texture_layer_count(texture); + ERR_FAIL_COND_V(p_layer >= layer_count, ERR_INVALID_PARAMETER); uint32_t width, height; uint32_t tight_mip_size = get_image_format_required_size(texture->format, texture->width, texture->height, texture->depth, texture->mipmaps, &width, &height); uint32_t required_size = tight_mip_size; - uint32_t required_align = get_compressed_image_format_block_byte_size(texture->format); - if (required_align == 1) { - required_align = get_image_format_pixel_size(texture->format); - } - required_align = STEPIFY(required_align, driver->api_trait_get(RDD::API_TRAIT_TEXTURE_TRANSFER_ALIGNMENT)); + uint32_t required_align = _texture_alignment(texture); ERR_FAIL_COND_V_MSG(required_size != (uint32_t)p_data.size(), ERR_INVALID_PARAMETER, "Required size for texture update (" + itos(required_size) + ") does not match data supplied size (" + itos(p_data.size()) + ")."); + _check_transfer_worker_texture(texture); + + uint32_t block_w, block_h; + get_compressed_image_format_block_dimensions(texture->format, block_w, block_h); + + uint32_t pixel_size = get_image_format_pixel_size(texture->format); + uint32_t pixel_rshift = get_compressed_image_format_pixel_rshift(texture->format); + uint32_t block_size = get_compressed_image_format_block_byte_size(texture->format); + uint32_t region_size = texture_upload_region_size_px; - const uint8_t *r = p_data.ptr(); + const uint8_t *read_ptr = p_data.ptr(); thread_local LocalVector command_buffer_to_texture_copies_vector; command_buffer_to_texture_copies_vector.clear(); - if (p_use_setup_queue && driver->api_trait_get(RDD::API_TRAIT_HONORS_PIPELINE_BARRIERS)) { - // When using the setup queue directly, we transition the texture to the optimal layout. - RDD::TextureBarrier tb; - tb.texture = texture->driver_id; - tb.dst_access = RDD::BARRIER_ACCESS_COPY_WRITE_BIT; - tb.prev_layout = RDD::TEXTURE_LAYOUT_UNDEFINED; - tb.next_layout = RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL; - tb.subresources.aspect = texture->barrier_aspect_flags; - tb.subresources.mipmap_count = texture->mipmaps; - tb.subresources.base_layer = p_layer; - tb.subresources.layer_count = 1; - - driver->command_pipeline_barrier(frames[frame].setup_command_buffer, RDD::PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, RDD::PIPELINE_STAGE_COPY_BIT, {}, {}, tb); - } else if (!p_use_setup_queue) { - // Indicate the texture will get modified for the shared texture fallback. - _texture_update_shared_fallback(p_texture, texture, true); - } + // Indicate the texture will get modified for the shared texture fallback. + _texture_update_shared_fallback(p_texture, texture, true); uint32_t mipmap_offset = 0; @@ -1213,13 +1407,11 @@ Error RenderingDevice::_texture_update(RID p_texture, uint32_t p_layer, const Ve uint32_t depth = 0; uint32_t image_total = get_image_format_required_size(texture->format, texture->width, texture->height, texture->depth, mm_i + 1, &width, &height, &depth); - const uint8_t *read_ptr_mipmap = r + mipmap_offset; + const uint8_t *read_ptr_mipmap = read_ptr + mipmap_offset; tight_mip_size = image_total - mipmap_offset; - for (uint32_t z = 0; z < depth; z++) { // For 3D textures, depth may be > 0. - - const uint8_t *read_ptr = read_ptr_mipmap + (tight_mip_size / depth) * z; - + for (uint32_t z = 0; z < depth; z++) { + const uint8_t *read_ptr_mipmap_layer = read_ptr_mipmap + (tight_mip_size / depth) * z; for (uint32_t y = 0; y < height; y += region_size) { for (uint32_t x = 0; x < width; x += region_size) { uint32_t region_w = MIN(region_size, width - x); @@ -1228,11 +1420,7 @@ Error RenderingDevice::_texture_update(RID p_texture, uint32_t p_layer, const Ve uint32_t region_logic_w = MIN(region_size, logic_width - x); uint32_t region_logic_h = MIN(region_size, logic_height - y); - uint32_t pixel_size = get_image_format_pixel_size(texture->format); - uint32_t block_w = 0, block_h = 0; - get_compressed_image_format_block_dimensions(texture->format, block_w, block_h); - - uint32_t region_pitch = (region_w * pixel_size * block_w) >> get_compressed_image_format_pixel_rshift(texture->format); + uint32_t region_pitch = (region_w * pixel_size * block_w) >> pixel_rshift; uint32_t pitch_step = driver->api_trait_get(RDD::API_TRAIT_TEXTURE_DATA_ROW_PITCH_STEP); region_pitch = STEPIFY(region_pitch, pitch_step); uint32_t to_allocate = region_pitch * region_h; @@ -1241,13 +1429,13 @@ Error RenderingDevice::_texture_update(RID p_texture, uint32_t p_layer, const Ve Error err = _staging_buffer_allocate(to_allocate, required_align, alloc_offset, alloc_size, required_action, false); ERR_FAIL_COND_V(err, ERR_CANT_CREATE); - if (!p_use_setup_queue && !command_buffer_to_texture_copies_vector.is_empty() && required_action == STAGING_REQUIRED_ACTION_FLUSH_AND_STALL_ALL) { + if (!command_buffer_to_texture_copies_vector.is_empty() && required_action == STAGING_REQUIRED_ACTION_FLUSH_AND_STALL_ALL) { if (_texture_make_mutable(texture, p_texture)) { // The texture must be mutable to be used as a copy destination. draw_graph.add_synchronization(); } - // If we're using the draw queue and the staging buffer requires flushing everything, we submit the command early and clear the current vector. + // If the staging buffer requires flushing everything, we submit the command early and clear the current vector. draw_graph.add_texture_update(texture->driver_id, texture->draw_tracker, command_buffer_to_texture_copies_vector); command_buffer_to_texture_copies_vector.clear(); } @@ -1266,24 +1454,7 @@ Error RenderingDevice::_texture_update(RID p_texture, uint32_t p_layer, const Ve ERR_FAIL_COND_V(region_w % block_w, ERR_BUG); ERR_FAIL_COND_V(region_h % block_h, ERR_BUG); - if (block_w != 1 || block_h != 1) { - // Compressed image (blocks). - // Must copy a block region. - - uint32_t block_size = get_compressed_image_format_block_byte_size(texture->format); - // Re-create current variables in blocky format. - uint32_t xb = x / block_w; - uint32_t yb = y / block_h; - uint32_t wb = width / block_w; - //uint32_t hb = height / block_h; - uint32_t region_wb = region_w / block_w; - uint32_t region_hb = region_h / block_h; - _copy_region(read_ptr, write_ptr, xb, yb, region_wb, region_hb, wb, region_pitch, block_size); - } else { - // Regular image (pixels). - // Must copy a pixel region. - _copy_region(read_ptr, write_ptr, x, y, region_w, region_h, width, region_pitch, pixel_size); - } + _copy_region_block_or_regular(read_ptr_mipmap_layer, write_ptr, x, y, width, region_w, region_h, block_w, block_h, region_pitch, pixel_size, block_size); { // Unmap. driver->buffer_unmap(staging_buffer_blocks[staging_buffer_current].driver_id); @@ -1298,14 +1469,10 @@ Error RenderingDevice::_texture_update(RID p_texture, uint32_t p_layer, const Ve copy_region.texture_offset = Vector3i(x, y, z); copy_region.texture_region_size = Vector3i(region_logic_w, region_logic_h, 1); - if (p_use_setup_queue) { - driver->command_copy_buffer_to_texture(frames[frame].setup_command_buffer, staging_buffer_blocks[staging_buffer_current].driver_id, texture->driver_id, RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL, copy_region); - } else { - RDG::RecordedBufferToTextureCopy buffer_to_texture_copy; - buffer_to_texture_copy.from_buffer = staging_buffer_blocks[staging_buffer_current].driver_id; - buffer_to_texture_copy.region = copy_region; - command_buffer_to_texture_copies_vector.push_back(buffer_to_texture_copy); - } + RDG::RecordedBufferToTextureCopy buffer_to_texture_copy; + buffer_to_texture_copy.from_buffer = staging_buffer_blocks[staging_buffer_current].driver_id; + buffer_to_texture_copy.region = copy_region; + command_buffer_to_texture_copies_vector.push_back(buffer_to_texture_copy); staging_buffer_blocks.write[staging_buffer_current].fill_amount = alloc_offset + alloc_size; } @@ -1317,27 +1484,13 @@ Error RenderingDevice::_texture_update(RID p_texture, uint32_t p_layer, const Ve logic_height = MAX(1u, logic_height >> 1); } - if (p_use_setup_queue && (texture->draw_tracker == nullptr) && driver->api_trait_get(RDD::API_TRAIT_HONORS_PIPELINE_BARRIERS)) { - // If the texture does not have a tracker, it means it must be transitioned to the sampling state. - RDD::TextureBarrier tb; - tb.texture = texture->driver_id; - tb.src_access = RDD::BARRIER_ACCESS_COPY_WRITE_BIT; - tb.prev_layout = RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL; - tb.next_layout = RDD::TEXTURE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - tb.subresources.aspect = texture->barrier_aspect_flags; - tb.subresources.mipmap_count = texture->mipmaps; - tb.subresources.base_layer = p_layer; - tb.subresources.layer_count = 1; - driver->command_pipeline_barrier(frames[frame].setup_command_buffer, RDD::PIPELINE_STAGE_COPY_BIT, RDD::PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, {}, {}, tb); - } else if (!p_use_setup_queue && !command_buffer_to_texture_copies_vector.is_empty()) { - if (_texture_make_mutable(texture, p_texture)) { - // The texture must be mutable to be used as a copy destination. - draw_graph.add_synchronization(); - } - - draw_graph.add_texture_update(texture->driver_id, texture->draw_tracker, command_buffer_to_texture_copies_vector); + if (_texture_make_mutable(texture, p_texture)) { + // The texture must be mutable to be used as a copy destination. + draw_graph.add_synchronization(); } + draw_graph.add_texture_update(texture->driver_id, texture->draw_tracker, command_buffer_to_texture_copies_vector); + return OK; } @@ -1587,7 +1740,7 @@ Vector RenderingDevice::_texture_get_data(Texture *tex, uint32_t p_laye } Vector RenderingDevice::texture_get_data(RID p_texture, uint32_t p_layer) { - _THREAD_SAFE_METHOD_ + ERR_RENDER_THREAD_GUARD_V(Vector()); Texture *tex = texture_owner.get_or_null(p_texture); ERR_FAIL_NULL_V(tex, Vector()); @@ -1599,7 +1752,9 @@ Vector RenderingDevice::texture_get_data(RID p_texture, uint32_t p_laye ERR_FAIL_COND_V(p_layer >= tex->layers, Vector()); - if ((tex->usage_flags & TEXTURE_USAGE_CPU_READ_BIT)) { + _check_transfer_worker_texture(tex); + + if (tex->usage_flags & TEXTURE_USAGE_CPU_READ_BIT) { // Does not need anything fancy, map and read. return _texture_get_data(tex, p_layer); } else { @@ -1701,7 +1856,7 @@ Vector RenderingDevice::texture_get_data(RID p_texture, uint32_t p_laye } bool RenderingDevice::texture_is_shared(RID p_texture) { - _THREAD_SAFE_METHOD_ + ERR_RENDER_THREAD_GUARD_V(false); Texture *tex = texture_owner.get_or_null(p_texture); ERR_FAIL_NULL_V(tex, false); @@ -1709,11 +1864,13 @@ bool RenderingDevice::texture_is_shared(RID p_texture) { } bool RenderingDevice::texture_is_valid(RID p_texture) { + ERR_RENDER_THREAD_GUARD_V(false); + return texture_owner.owns(p_texture); } RD::TextureFormat RenderingDevice::texture_get_format(RID p_texture) { - _THREAD_SAFE_METHOD_ + ERR_RENDER_THREAD_GUARD_V(TextureFormat()); Texture *tex = texture_owner.get_or_null(p_texture); ERR_FAIL_NULL_V(tex, TextureFormat()); @@ -1736,7 +1893,7 @@ RD::TextureFormat RenderingDevice::texture_get_format(RID p_texture) { } Size2i RenderingDevice::texture_size(RID p_texture) { - _THREAD_SAFE_METHOD_ + ERR_RENDER_THREAD_GUARD_V(Size2i()); Texture *tex = texture_owner.get_or_null(p_texture); ERR_FAIL_NULL_V(tex, Size2i()); @@ -1750,7 +1907,7 @@ uint64_t RenderingDevice::texture_get_native_handle(RID p_texture) { #endif Error RenderingDevice::texture_copy(RID p_from_texture, RID p_to_texture, const Vector3 &p_from, const Vector3 &p_to, const Vector3 &p_size, uint32_t p_src_mipmap, uint32_t p_dst_mipmap, uint32_t p_src_layer, uint32_t p_dst_layer) { - _THREAD_SAFE_METHOD_ + ERR_RENDER_THREAD_GUARD_V(ERR_UNAVAILABLE); Texture *src_tex = texture_owner.get_or_null(p_from_texture); ERR_FAIL_NULL_V(src_tex, ERR_INVALID_PARAMETER); @@ -1789,6 +1946,9 @@ Error RenderingDevice::texture_copy(RID p_from_texture, RID p_to_texture, const ERR_FAIL_COND_V_MSG(src_tex->read_aspect_flags != dst_tex->read_aspect_flags, ERR_INVALID_PARAMETER, "Source and destination texture must be of the same type (color or depth)."); + _check_transfer_worker_texture(src_tex); + _check_transfer_worker_texture(dst_tex); + RDD::TextureCopyRegion copy_region; copy_region.src_subresources.aspect = src_tex->read_aspect_flags; copy_region.src_subresources.mipmap = p_src_mipmap; @@ -1820,7 +1980,7 @@ Error RenderingDevice::texture_copy(RID p_from_texture, RID p_to_texture, const } Error RenderingDevice::texture_resolve_multisample(RID p_from_texture, RID p_to_texture) { - _THREAD_SAFE_METHOD_ + ERR_RENDER_THREAD_GUARD_V(ERR_UNAVAILABLE); Texture *src_tex = texture_owner.get_or_null(p_from_texture); ERR_FAIL_NULL_V(src_tex, ERR_INVALID_PARAMETER); @@ -1853,6 +2013,9 @@ Error RenderingDevice::texture_resolve_multisample(RID p_from_texture, RID p_to_ // Indicate the texture will get modified for the shared texture fallback. _texture_update_shared_fallback(p_to_texture, dst_tex, true); + _check_transfer_worker_texture(src_tex); + _check_transfer_worker_texture(dst_tex); + // The textures must be mutable to be used in the resolve operation. bool src_made_mutable = _texture_make_mutable(src_tex, p_from_texture); bool dst_made_mutable = _texture_make_mutable(dst_tex, p_to_texture); @@ -1866,7 +2029,7 @@ Error RenderingDevice::texture_resolve_multisample(RID p_from_texture, RID p_to_ } Error RenderingDevice::texture_clear(RID p_texture, const Color &p_color, uint32_t p_base_mipmap, uint32_t p_mipmaps, uint32_t p_base_layer, uint32_t p_layers) { - _THREAD_SAFE_METHOD_ + ERR_RENDER_THREAD_GUARD_V(ERR_UNAVAILABLE); Texture *src_tex = texture_owner.get_or_null(p_texture); ERR_FAIL_NULL_V(src_tex, ERR_INVALID_PARAMETER); @@ -1883,6 +2046,8 @@ Error RenderingDevice::texture_clear(RID p_texture, const Color &p_color, uint32 ERR_FAIL_COND_V(p_base_mipmap + p_mipmaps > src_tex->mipmaps, ERR_INVALID_PARAMETER); ERR_FAIL_COND_V(p_base_layer + p_layers > src_tex->layers, ERR_INVALID_PARAMETER); + _check_transfer_worker_texture(src_tex); + RDD::TextureSubresourceRange range; range.aspect = src_tex->read_aspect_flags; range.base_mipmap = src_tex->base_mipmap + p_base_mipmap; @@ -1906,8 +2071,6 @@ Error RenderingDevice::texture_clear(RID p_texture, const Color &p_color, uint32 bool RenderingDevice::texture_is_format_supported_for_usage(DataFormat p_format, BitField p_usage) const { ERR_FAIL_INDEX_V(p_format, DATA_FORMAT_MAX, false); - _THREAD_SAFE_METHOD_ - bool cpu_readable = (p_usage & RDD::TEXTURE_USAGE_CPU_READ_BIT); BitField supported = driver->texture_get_usages_supported_by_format(p_format, cpu_readable); bool any_unsupported = (((int64_t)supported) | ((int64_t)p_usage)) != ((int64_t)supported); @@ -2081,6 +2244,8 @@ RDD::RenderPassID RenderingDevice::_render_pass_create(const Vectorresolve_attachments.size(); j++) { int32_t attachment = pass->resolve_attachments[j]; + attachments[attachment].load_op = RDD::ATTACHMENT_LOAD_OP_DONT_CARE; + RDD::AttachmentReference reference; if (attachment == ATTACHMENT_UNUSED) { reference.attachment = RDD::AttachmentReference::UNUSED; @@ -2212,10 +2377,20 @@ RenderingDevice::FramebufferFormatID RenderingDevice::framebuffer_format_create_ fb_format.pass_samples = samples; fb_format.view_count = p_view_count; framebuffer_formats[id] = fb_format; + +#if PRINT_FRAMEBUFFER_FORMAT + print_line("FRAMEBUFFER FORMAT:", id, "ATTACHMENTS:", p_attachments.size(), "PASSES:", p_passes.size()); + for (RD::AttachmentFormat attachment : p_attachments) { + print_line("FORMAT:", attachment.format, "SAMPLES:", attachment.samples, "USAGE FLAGS:", attachment.usage_flags); + } +#endif + return id; } RenderingDevice::FramebufferFormatID RenderingDevice::framebuffer_format_create_empty(TextureSamples p_samples) { + _THREAD_SAFE_METHOD_ + FramebufferFormatKey key; key.passes.push_back(FramebufferPass()); @@ -2240,10 +2415,17 @@ RenderingDevice::FramebufferFormatID RenderingDevice::framebuffer_format_create_ fb_format.render_pass = render_pass; fb_format.pass_samples.push_back(p_samples); framebuffer_formats[id] = fb_format; + +#if PRINT_FRAMEBUFFER_FORMAT + print_line("FRAMEBUFFER FORMAT:", id, "ATTACHMENTS: EMPTY"); +#endif + return id; } RenderingDevice::TextureSamples RenderingDevice::framebuffer_format_get_texture_samples(FramebufferFormatID p_format, uint32_t p_pass) { + _THREAD_SAFE_METHOD_ + HashMap::Iterator E = framebuffer_formats.find(p_format); ERR_FAIL_COND_V(!E, TEXTURE_SAMPLES_1); ERR_FAIL_COND_V(p_pass >= uint32_t(E->value.pass_samples.size()), TEXTURE_SAMPLES_1); @@ -2253,6 +2435,7 @@ RenderingDevice::TextureSamples RenderingDevice::framebuffer_format_get_texture_ RID RenderingDevice::framebuffer_create_empty(const Size2i &p_size, TextureSamples p_samples, FramebufferFormatID p_format_check) { _THREAD_SAFE_METHOD_ + Framebuffer framebuffer; framebuffer.format_id = framebuffer_format_create_empty(p_samples); ERR_FAIL_COND_V(p_format_check != INVALID_FORMAT_ID && framebuffer.format_id != p_format_check, RID()); @@ -2276,6 +2459,10 @@ RID RenderingDevice::framebuffer_create(const Vector &p_texture_attachments ERR_FAIL_COND_V_MSG(texture && texture->layers != p_view_count, RID(), "Layers of our texture doesn't match view count for this framebuffer"); + if (texture != nullptr) { + _check_transfer_worker_texture(texture); + } + if (texture && texture->usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { pass.depth_attachment = i; } else if (texture && texture->usage_flags & TEXTURE_USAGE_VRS_ATTACHMENT_BIT) { @@ -2310,6 +2497,8 @@ RID RenderingDevice::framebuffer_create_multipass(const Vector &p_texture_a } else { ERR_FAIL_COND_V_MSG(texture->layers != p_view_count, RID(), "Layers of our texture doesn't match view count for this framebuffer"); + _check_transfer_worker_texture(texture); + if (!size_set) { size.width = texture->width; size.height = texture->height; @@ -2409,10 +2598,10 @@ RID RenderingDevice::sampler_create(const SamplerState &p_state) { } bool RenderingDevice::sampler_is_format_supported_for_filter(DataFormat p_format, SamplerFilter p_sampler_filter) const { - ERR_FAIL_INDEX_V(p_format, DATA_FORMAT_MAX, false); - _THREAD_SAFE_METHOD_ + ERR_FAIL_INDEX_V(p_format, DATA_FORMAT_MAX, false); + return driver->sampler_is_format_supported_for_filter(p_format, p_sampler_filter); } @@ -2421,8 +2610,6 @@ bool RenderingDevice::sampler_is_format_supported_for_filter(DataFormat p_format /***********************/ RID RenderingDevice::vertex_buffer_create(uint32_t p_size_bytes, const Vector &p_data, bool p_use_as_storage) { - _THREAD_SAFE_METHOD_ - ERR_FAIL_COND_V(p_data.size() && (uint32_t)p_data.size() != p_size_bytes, RID()); Buffer buffer; @@ -2441,10 +2628,12 @@ RID RenderingDevice::vertex_buffer_create(uint32_t p_size_bytes, const Vectortransfer_worker_index >= 0) { + vertex_array.transfer_worker_indices.push_back(buffer->transfer_worker_index); + vertex_array.transfer_worker_operations.push_back(buffer->transfer_worker_operation); + } } RID id = vertex_array_owner.make_rid(vertex_array); @@ -2555,8 +2749,6 @@ RID RenderingDevice::vertex_array_create(uint32_t p_vertex_count, VertexFormatID } RID RenderingDevice::index_buffer_create(uint32_t p_index_count, IndexBufferFormat p_format, const Vector &p_data, bool p_use_restart_indices) { - _THREAD_SAFE_METHOD_ - ERR_FAIL_COND_V(p_index_count == 0, RID()); IndexBuffer index_buffer; @@ -2605,10 +2797,12 @@ RID RenderingDevice::index_buffer_create(uint32_t p_index_count, IndexBufferForm } if (p_data.size()) { - _buffer_update(&index_buffer, RID(), 0, p_data.ptr(), p_data.size()); + _buffer_initialize(&index_buffer, p_data.ptr(), p_data.size()); } + _THREAD_SAFE_LOCK_ buffer_memory += index_buffer.size; + _THREAD_SAFE_UNLOCK_ RID id = index_buffer_owner.make_rid(index_buffer); #ifdef DEV_ENABLED @@ -2635,6 +2829,8 @@ RID RenderingDevice::index_array_create(RID p_index_buffer, uint32_t p_index_off index_array.indices = p_index_count; index_array.format = index_buffer->format; index_array.supports_restart_indices = index_buffer->supports_restart_indices; + index_array.transfer_worker_index = index_buffer->transfer_worker_index; + index_array.transfer_worker_operation = index_buffer->transfer_worker_operation; RID id = index_array_owner.make_rid(index_array); _add_dependency(id, p_index_buffer); @@ -2753,6 +2949,8 @@ RID RenderingDevice::shader_create_from_bytecode(const Vector &p_shader } RID RenderingDevice::shader_create_placeholder() { + _THREAD_SAFE_METHOD_ + Shader shader; return shader_owner.make_rid(shader); } @@ -2770,8 +2968,6 @@ uint64_t RenderingDevice::shader_get_vertex_input_attribute_mask(RID p_shader) { /******************/ RID RenderingDevice::uniform_buffer_create(uint32_t p_size_bytes, const Vector &p_data) { - _THREAD_SAFE_METHOD_ - ERR_FAIL_COND_V(p_data.size() && (uint32_t)p_data.size() != p_size_bytes, RID()); Buffer buffer; @@ -2787,10 +2983,12 @@ RID RenderingDevice::uniform_buffer_create(uint32_t p_size_bytes, const Vector &p_uniforms, RID p driver_uniform.ids.push_back(*sampler_driver_id); driver_uniform.ids.push_back(driver_id); + _check_transfer_worker_texture(texture); } } break; case UNIFORM_TYPE_TEXTURE: { @@ -2965,6 +3164,7 @@ RID RenderingDevice::uniform_set_create(const Vector &p_uniforms, RID p DEV_ASSERT(!texture->owner.is_valid() || texture_owner.get_or_null(texture->owner)); driver_uniform.ids.push_back(driver_id); + _check_transfer_worker_texture(texture); } } break; case UNIFORM_TYPE_IMAGE: { @@ -3008,6 +3208,7 @@ RID RenderingDevice::uniform_set_create(const Vector &p_uniforms, RID p DEV_ASSERT(!texture->owner.is_valid() || texture_owner.get_or_null(texture->owner)); driver_uniform.ids.push_back(texture->driver_id); + _check_transfer_worker_texture(texture); } } break; case UNIFORM_TYPE_TEXTURE_BUFFER: { @@ -3042,6 +3243,7 @@ RID RenderingDevice::uniform_set_create(const Vector &p_uniforms, RID p } driver_uniform.ids.push_back(buffer->driver_id); + _check_transfer_worker_buffer(buffer); } } break; case UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER: { @@ -3070,6 +3272,7 @@ RID RenderingDevice::uniform_set_create(const Vector &p_uniforms, RID p driver_uniform.ids.push_back(*sampler_driver_id); driver_uniform.ids.push_back(buffer->driver_id); + _check_transfer_worker_buffer(buffer); } } break; case UNIFORM_TYPE_IMAGE_BUFFER: { @@ -3094,6 +3297,7 @@ RID RenderingDevice::uniform_set_create(const Vector &p_uniforms, RID p } driver_uniform.ids.push_back(buffer->driver_id); + _check_transfer_worker_buffer(buffer); } break; case UNIFORM_TYPE_STORAGE_BUFFER: { ERR_FAIL_COND_V_MSG(uniform.get_id_count() != 1, RID(), @@ -3133,6 +3337,7 @@ RID RenderingDevice::uniform_set_create(const Vector &p_uniforms, RID p } driver_uniform.ids.push_back(buffer->driver_id); + _check_transfer_worker_buffer(buffer); } break; case UNIFORM_TYPE_INPUT_ATTACHMENT: { ERR_FAIL_COND_V_MSG(shader->is_compute, RID(), "InputAttachment (binding: " + itos(uniform.binding) + ") supplied for compute shader (this is not allowed)."); @@ -3158,6 +3363,7 @@ RID RenderingDevice::uniform_set_create(const Vector &p_uniforms, RID p DEV_ASSERT(!texture->owner.is_valid() || texture_owner.get_or_null(texture->owner)); driver_uniform.ids.push_back(texture->driver_id); + _check_transfer_worker_texture(texture); } } break; default: { @@ -3197,10 +3403,14 @@ RID RenderingDevice::uniform_set_create(const Vector &p_uniforms, RID p } bool RenderingDevice::uniform_set_is_valid(RID p_uniform_set) { + _THREAD_SAFE_METHOD_ + return uniform_set_owner.owns(p_uniform_set); } void RenderingDevice::uniform_set_set_invalidation_callback(RID p_uniform_set, InvalidationCallback p_callback, void *p_userdata) { + _THREAD_SAFE_METHOD_ + UniformSet *us = uniform_set_owner.get_or_null(p_uniform_set); ERR_FAIL_NULL(us); us->invalidated_callback = p_callback; @@ -3212,21 +3422,22 @@ void RenderingDevice::uniform_set_set_invalidation_callback(RID p_uniform_set, I /*******************/ RID RenderingDevice::render_pipeline_create(RID p_shader, FramebufferFormatID p_framebuffer_format, VertexFormatID p_vertex_format, RenderPrimitive p_render_primitive, const PipelineRasterizationState &p_rasterization_state, const PipelineMultisampleState &p_multisample_state, const PipelineDepthStencilState &p_depth_stencil_state, const PipelineColorBlendState &p_blend_state, BitField p_dynamic_state_flags, uint32_t p_for_render_pass, const Vector &p_specialization_constants) { - _THREAD_SAFE_METHOD_ - // Needs a shader. Shader *shader = shader_owner.get_or_null(p_shader); ERR_FAIL_NULL_V(shader, RID()); + ERR_FAIL_COND_V_MSG(shader->is_compute, RID(), "Compute shaders can't be used in render pipelines"); - ERR_FAIL_COND_V_MSG(shader->is_compute, RID(), - "Compute shaders can't be used in render pipelines"); + FramebufferFormat fb_format; + { + _THREAD_SAFE_METHOD_ - if (p_framebuffer_format == INVALID_ID) { - // If nothing provided, use an empty one (no attachments). - p_framebuffer_format = framebuffer_format_create(Vector()); + if (p_framebuffer_format == INVALID_ID) { + // If nothing provided, use an empty one (no attachments). + p_framebuffer_format = framebuffer_format_create(Vector()); + } + ERR_FAIL_COND_V(!framebuffer_formats.has(p_framebuffer_format), RID()); + fb_format = framebuffer_formats[p_framebuffer_format]; } - ERR_FAIL_COND_V(!framebuffer_formats.has(p_framebuffer_format), RID()); - const FramebufferFormat &fb_format = framebuffer_formats[p_framebuffer_format]; // Validate shader vs. framebuffer. { @@ -3372,30 +3583,41 @@ RID RenderingDevice::render_pipeline_create(RID p_shader, FramebufferFormatID p_ }; pipeline.validation.primitive_minimum = primitive_minimum[p_render_primitive]; #endif + // Create ID to associate with this pipeline. RID id = render_pipeline_owner.make_rid(pipeline); + { + _THREAD_SAFE_METHOD_ + #ifdef DEV_ENABLED - set_resource_name(id, "RID:" + itos(id.get_id())); + set_resource_name(id, "RID:" + itos(id.get_id())); #endif - // Now add all the dependencies. - _add_dependency(id, p_shader); + // Now add all the dependencies. + _add_dependency(id, p_shader); + } + return id; } bool RenderingDevice::render_pipeline_is_valid(RID p_pipeline) { _THREAD_SAFE_METHOD_ + return render_pipeline_owner.owns(p_pipeline); } RID RenderingDevice::compute_pipeline_create(RID p_shader, const Vector &p_specialization_constants) { - _THREAD_SAFE_METHOD_ + Shader *shader; - // Needs a shader. - Shader *shader = shader_owner.get_or_null(p_shader); - ERR_FAIL_NULL_V(shader, RID()); + { + _THREAD_SAFE_METHOD_ - ERR_FAIL_COND_V_MSG(!shader->is_compute, RID(), - "Non-compute shaders can't be used in compute pipelines"); + // Needs a shader. + shader = shader_owner.get_or_null(p_shader); + ERR_FAIL_NULL_V(shader, RID()); + + ERR_FAIL_COND_V_MSG(!shader->is_compute, RID(), + "Non-compute shaders can't be used in compute pipelines"); + } for (int i = 0; i < shader->specialization_constants.size(); i++) { const ShaderSpecializationConstant &sc = shader->specialization_constants[i]; @@ -3427,15 +3649,22 @@ RID RenderingDevice::compute_pipeline_create(RID p_shader, const Vectorsurface_get_from_window(p_screen); ERR_FAIL_COND_V_MSG(surface == 0, 0, "A surface was not created for the screen."); return context->surface_get_width(surface); @@ -3522,6 +3752,7 @@ int RenderingDevice::screen_get_width(DisplayServer::WindowID p_screen) const { int RenderingDevice::screen_get_height(DisplayServer::WindowID p_screen) const { _THREAD_SAFE_METHOD_ + RenderingContextDriver::SurfaceID surface = context->surface_get_from_window(p_screen); ERR_FAIL_COND_V_MSG(surface == 0, 0, "A surface was not created for the screen."); return context->surface_get_height(surface); @@ -3568,7 +3799,7 @@ Error RenderingDevice::screen_free(DisplayServer::WindowID p_screen) { /*******************/ RenderingDevice::DrawListID RenderingDevice::draw_list_begin_for_screen(DisplayServer::WindowID p_screen, const Color &p_clear_color) { - _THREAD_SAFE_METHOD_ + ERR_RENDER_THREAD_GUARD_V(INVALID_ID); ERR_FAIL_COND_V_MSG(draw_list != nullptr, INVALID_ID, "Only one draw list can be active at the same time."); ERR_FAIL_COND_V_MSG(compute_list != nullptr, INVALID_ID, "Only one draw/compute list can be active at the same time."); @@ -3594,8 +3825,8 @@ RenderingDevice::DrawListID RenderingDevice::draw_list_begin_for_screen(DisplayS RDD::RenderPassID render_pass = driver->swap_chain_get_render_pass(sc_it->value); draw_graph.add_draw_list_begin(render_pass, fb_it->value, viewport, clear_value, true, false, RDD::BreadcrumbMarker::BLIT_PASS); - _draw_list_set_viewport(viewport); - _draw_list_set_scissor(viewport); + draw_graph.add_draw_list_set_viewport(viewport); + draw_graph.add_draw_list_set_scissor(viewport); return int64_t(ID_TYPE_DRAW_LIST) << ID_BASE_SHIFT; } @@ -3707,14 +3938,6 @@ Error RenderingDevice::_draw_list_render_pass_begin(Framebuffer *p_framebuffer, return OK; } -void RenderingDevice::_draw_list_set_viewport(Rect2i p_rect) { - draw_graph.add_draw_list_set_viewport(p_rect); -} - -void RenderingDevice::_draw_list_set_scissor(Rect2i p_rect) { - draw_graph.add_draw_list_set_scissor(p_rect); -} - void RenderingDevice::_draw_list_insert_clear_region(DrawList *p_draw_list, Framebuffer *p_framebuffer, Point2i p_viewport_offset, Point2i p_viewport_size, bool p_clear_color, const Vector &p_clear_colors, bool p_clear_depth, float p_depth, uint32_t p_stencil) { LocalVector clear_attachments; int color_index = 0; @@ -3752,7 +3975,7 @@ void RenderingDevice::_draw_list_insert_clear_region(DrawList *p_draw_list, Fram } RenderingDevice::DrawListID RenderingDevice::draw_list_begin(RID p_framebuffer, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, const Vector &p_clear_color_values, float p_clear_depth, uint32_t p_clear_stencil, const Rect2 &p_region, uint32_t p_breadcrumb) { - _THREAD_SAFE_METHOD_ + ERR_RENDER_THREAD_GUARD_V(INVALID_ID); ERR_FAIL_COND_V_MSG(draw_list != nullptr, INVALID_ID, "Only one draw list can be active at the same time."); @@ -3812,8 +4035,9 @@ RenderingDevice::DrawListID RenderingDevice::draw_list_begin(RID p_framebuffer, #endif draw_list_current_subpass = 0; - _draw_list_set_viewport(Rect2i(viewport_offset, viewport_size)); - _draw_list_set_scissor(Rect2i(viewport_offset, viewport_size)); + Rect2i viewport_rect(viewport_offset, viewport_size); + draw_graph.add_draw_list_set_viewport(viewport_rect); + draw_graph.add_draw_list_set_scissor(viewport_rect); return int64_t(ID_TYPE_DRAW_LIST) << ID_BASE_SHIFT; } @@ -3839,6 +4063,8 @@ RenderingDevice::DrawList *RenderingDevice::_get_draw_list_ptr(DrawListID p_id) } void RenderingDevice::draw_list_set_blend_constants(DrawListID p_list, const Color &p_color) { + ERR_RENDER_THREAD_GUARD(); + DrawList *dl = _get_draw_list_ptr(p_list); ERR_FAIL_NULL(dl); #ifdef DEBUG_ENABLED @@ -3849,6 +4075,8 @@ void RenderingDevice::draw_list_set_blend_constants(DrawListID p_list, const Col } void RenderingDevice::draw_list_bind_render_pipeline(DrawListID p_list, RID p_render_pipeline) { + ERR_RENDER_THREAD_GUARD(); + DrawList *dl = _get_draw_list_ptr(p_list); ERR_FAIL_NULL(dl); #ifdef DEBUG_ENABLED @@ -3877,23 +4105,35 @@ void RenderingDevice::draw_list_bind_render_pipeline(DrawListID p_list, RID p_re const uint32_t *pformats = pipeline->set_formats.ptr(); // Pipeline set formats. uint32_t first_invalid_set = UINT32_MAX; // All valid by default. - switch (driver->api_trait_get(RDD::API_TRAIT_SHADER_CHANGE_INVALIDATION)) { - case RDD::SHADER_CHANGE_INVALIDATION_ALL_BOUND_UNIFORM_SETS: { - first_invalid_set = 0; - } break; - case RDD::SHADER_CHANGE_INVALIDATION_INCOMPATIBLE_SETS_PLUS_CASCADE: { - for (uint32_t i = 0; i < pcount; i++) { - if (dl->state.sets[i].pipeline_expected_format != pformats[i]) { - first_invalid_set = i; - break; - } - } - } break; - case RDD::SHADER_CHANGE_INVALIDATION_ALL_OR_NONE_ACCORDING_TO_LAYOUT_HASH: { - if (dl->state.pipeline_shader_layout_hash != pipeline->shader_layout_hash) { + if (pipeline->push_constant_size != dl->state.pipeline_push_constant_size) { + // All sets must be invalidated as the pipeline layout is not compatible if the push constant range is different. + dl->state.pipeline_push_constant_size = pipeline->push_constant_size; + first_invalid_set = 0; + } else { + switch (driver->api_trait_get(RDD::API_TRAIT_SHADER_CHANGE_INVALIDATION)) { + case RDD::SHADER_CHANGE_INVALIDATION_ALL_BOUND_UNIFORM_SETS: { first_invalid_set = 0; - } - } break; + } break; + case RDD::SHADER_CHANGE_INVALIDATION_INCOMPATIBLE_SETS_PLUS_CASCADE: { + for (uint32_t i = 0; i < pcount; i++) { + if (dl->state.sets[i].pipeline_expected_format != pformats[i]) { + first_invalid_set = i; + break; + } + } + } break; + case RDD::SHADER_CHANGE_INVALIDATION_ALL_OR_NONE_ACCORDING_TO_LAYOUT_HASH: { + if (dl->state.pipeline_shader_layout_hash != pipeline->shader_layout_hash) { + first_invalid_set = 0; + } + } break; + } + } + + if (pipeline->push_constant_size) { +#ifdef DEBUG_ENABLED + dl->validation.pipeline_push_constant_supplied = false; +#endif } for (uint32_t i = 0; i < pcount; i++) { @@ -3908,12 +4148,6 @@ void RenderingDevice::draw_list_bind_render_pipeline(DrawListID p_list, RID p_re dl->state.set_count = pcount; // Update set count. - if (pipeline->push_constant_size) { -#ifdef DEBUG_ENABLED - dl->validation.pipeline_push_constant_supplied = false; -#endif - } - dl->state.pipeline_shader = pipeline->shader; dl->state.pipeline_shader_driver_id = pipeline->shader_driver_id; dl->state.pipeline_shader_layout_hash = pipeline->shader_layout_hash; @@ -3932,6 +4166,8 @@ void RenderingDevice::draw_list_bind_render_pipeline(DrawListID p_list, RID p_re } void RenderingDevice::draw_list_bind_uniform_set(DrawListID p_list, RID p_uniform_set, uint32_t p_index) { + ERR_RENDER_THREAD_GUARD(); + #ifdef DEBUG_ENABLED ERR_FAIL_COND_MSG(p_index >= driver->limit_get(LIMIT_MAX_BOUND_UNIFORM_SETS) || p_index >= MAX_UNIFORM_SETS, "Attempting to bind a descriptor set (" + itos(p_index) + ") greater than what the hardware supports (" + itos(driver->limit_get(LIMIT_MAX_BOUND_UNIFORM_SETS)) + ")."); @@ -3972,19 +4208,23 @@ void RenderingDevice::draw_list_bind_uniform_set(DrawListID p_list, RID p_unifor } void RenderingDevice::draw_list_bind_vertex_array(DrawListID p_list, RID p_vertex_array) { + ERR_RENDER_THREAD_GUARD(); + DrawList *dl = _get_draw_list_ptr(p_list); ERR_FAIL_NULL(dl); #ifdef DEBUG_ENABLED ERR_FAIL_COND_MSG(!dl->validation.active, "Submitted Draw Lists can no longer be modified."); #endif - const VertexArray *vertex_array = vertex_array_owner.get_or_null(p_vertex_array); + VertexArray *vertex_array = vertex_array_owner.get_or_null(p_vertex_array); ERR_FAIL_NULL(vertex_array); if (dl->state.vertex_array == p_vertex_array) { return; // Already set. } + _check_transfer_worker_vertex_array(vertex_array); + dl->state.vertex_array = p_vertex_array; #ifdef DEBUG_ENABLED @@ -4001,19 +4241,23 @@ void RenderingDevice::draw_list_bind_vertex_array(DrawListID p_list, RID p_verte } void RenderingDevice::draw_list_bind_index_array(DrawListID p_list, RID p_index_array) { + ERR_RENDER_THREAD_GUARD(); + DrawList *dl = _get_draw_list_ptr(p_list); ERR_FAIL_NULL(dl); #ifdef DEBUG_ENABLED ERR_FAIL_COND_MSG(!dl->validation.active, "Submitted Draw Lists can no longer be modified."); #endif - const IndexArray *index_array = index_array_owner.get_or_null(p_index_array); + IndexArray *index_array = index_array_owner.get_or_null(p_index_array); ERR_FAIL_NULL(index_array); if (dl->state.index_array == p_index_array) { return; // Already set. } + _check_transfer_worker_index_array(index_array); + dl->state.index_array = p_index_array; #ifdef DEBUG_ENABLED dl->validation.index_array_max_index = index_array->max_index; @@ -4029,6 +4273,8 @@ void RenderingDevice::draw_list_bind_index_array(DrawListID p_list, RID p_index_ } void RenderingDevice::draw_list_set_line_width(DrawListID p_list, float p_width) { + ERR_RENDER_THREAD_GUARD(); + DrawList *dl = _get_draw_list_ptr(p_list); ERR_FAIL_NULL(dl); #ifdef DEBUG_ENABLED @@ -4039,6 +4285,8 @@ void RenderingDevice::draw_list_set_line_width(DrawListID p_list, float p_width) } void RenderingDevice::draw_list_set_push_constant(DrawListID p_list, const void *p_data, uint32_t p_data_size) { + ERR_RENDER_THREAD_GUARD(); + DrawList *dl = _get_draw_list_ptr(p_list); ERR_FAIL_NULL(dl); @@ -4059,6 +4307,8 @@ void RenderingDevice::draw_list_set_push_constant(DrawListID p_list, const void } void RenderingDevice::draw_list_draw(DrawListID p_list, bool p_use_indices, uint32_t p_instances, uint32_t p_procedural_vertices) { + ERR_RENDER_THREAD_GUARD(); + DrawList *dl = _get_draw_list_ptr(p_list); ERR_FAIL_NULL(dl); #ifdef DEBUG_ENABLED @@ -4192,6 +4442,8 @@ void RenderingDevice::draw_list_draw(DrawListID p_list, bool p_use_indices, uint } void RenderingDevice::draw_list_enable_scissor(DrawListID p_list, const Rect2 &p_rect) { + ERR_RENDER_THREAD_GUARD(); + DrawList *dl = _get_draw_list_ptr(p_list); ERR_FAIL_NULL(dl); @@ -4207,25 +4459,30 @@ void RenderingDevice::draw_list_enable_scissor(DrawListID p_list, const Rect2 &p return; } - _draw_list_set_scissor(rect); + draw_graph.add_draw_list_set_scissor(rect); } void RenderingDevice::draw_list_disable_scissor(DrawListID p_list) { + ERR_RENDER_THREAD_GUARD(); + DrawList *dl = _get_draw_list_ptr(p_list); ERR_FAIL_NULL(dl); #ifdef DEBUG_ENABLED ERR_FAIL_COND_MSG(!dl->validation.active, "Submitted Draw Lists can no longer be modified."); #endif - _draw_list_set_scissor(dl->viewport); + draw_graph.add_draw_list_set_scissor(dl->viewport); } uint32_t RenderingDevice::draw_list_get_current_pass() { + ERR_RENDER_THREAD_GUARD_V(0); + return draw_list_current_subpass; } RenderingDevice::DrawListID RenderingDevice::draw_list_switch_to_next_pass() { - _THREAD_SAFE_METHOD_ + ERR_RENDER_THREAD_GUARD_V(INVALID_ID); + ERR_FAIL_NULL_V(draw_list, INVALID_ID); ERR_FAIL_COND_V(draw_list_current_subpass >= draw_list_subpass_count - 1, INVALID_FORMAT_ID); @@ -4248,9 +4505,6 @@ Error RenderingDevice::draw_list_switch_to_next_pass_split(uint32_t p_splits, Dr #endif Error RenderingDevice::_draw_list_allocate(const Rect2i &p_viewport, uint32_t p_subpass) { - // Lock while draw_list is active. - _THREAD_SAFE_LOCK_ - draw_list = memnew(DrawList); draw_list->viewport = p_viewport; @@ -4264,13 +4518,10 @@ void RenderingDevice::_draw_list_free(Rect2i *r_last_viewport) { // Just end the list. memdelete(draw_list); draw_list = nullptr; - - // Draw_list is no longer active. - _THREAD_SAFE_UNLOCK_ } void RenderingDevice::draw_list_end() { - _THREAD_SAFE_METHOD_ + ERR_RENDER_THREAD_GUARD(); ERR_FAIL_NULL_MSG(draw_list, "Immediate draw list is already inactive."); @@ -4297,13 +4548,10 @@ void RenderingDevice::draw_list_end() { /***********************/ RenderingDevice::ComputeListID RenderingDevice::compute_list_begin() { - _THREAD_SAFE_METHOD_ + ERR_RENDER_THREAD_GUARD_V(INVALID_ID); ERR_FAIL_COND_V_MSG(compute_list != nullptr, INVALID_ID, "Only one draw/compute list can be active at the same time."); - // Lock while compute_list is active. - _THREAD_SAFE_LOCK_ - compute_list = memnew(ComputeList); draw_graph.add_compute_list_begin(); @@ -4312,7 +4560,7 @@ RenderingDevice::ComputeListID RenderingDevice::compute_list_begin() { } void RenderingDevice::compute_list_bind_compute_pipeline(ComputeListID p_list, RID p_compute_pipeline) { - // Must be called within a compute list, the class mutex is locked during that time + ERR_RENDER_THREAD_GUARD(); ERR_FAIL_COND(p_list != ID_TYPE_COMPUTE_LIST); ERR_FAIL_NULL(compute_list); @@ -4391,7 +4639,7 @@ void RenderingDevice::compute_list_bind_compute_pipeline(ComputeListID p_list, R } void RenderingDevice::compute_list_bind_uniform_set(ComputeListID p_list, RID p_uniform_set, uint32_t p_index) { - // Must be called within a compute list, the class mutex is locked during that time + ERR_RENDER_THREAD_GUARD(); ERR_FAIL_COND(p_list != ID_TYPE_COMPUTE_LIST); ERR_FAIL_NULL(compute_list); @@ -4436,6 +4684,8 @@ void RenderingDevice::compute_list_bind_uniform_set(ComputeListID p_list, RID p_ } void RenderingDevice::compute_list_set_push_constant(ComputeListID p_list, const void *p_data, uint32_t p_data_size) { + ERR_RENDER_THREAD_GUARD(); + ERR_FAIL_COND(p_list != ID_TYPE_COMPUTE_LIST); ERR_FAIL_NULL(compute_list); ERR_FAIL_COND_MSG(p_data_size > MAX_PUSH_CONSTANT_SIZE, "Push constants can't be bigger than 128 bytes to maintain compatibility."); @@ -4463,7 +4713,7 @@ void RenderingDevice::compute_list_set_push_constant(ComputeListID p_list, const } void RenderingDevice::compute_list_dispatch(ComputeListID p_list, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) { - // Must be called within a compute list, the class mutex is locked during that time + ERR_RENDER_THREAD_GUARD(); ERR_FAIL_COND(p_list != ID_TYPE_COMPUTE_LIST); ERR_FAIL_NULL(compute_list); @@ -4551,6 +4801,8 @@ void RenderingDevice::compute_list_dispatch(ComputeListID p_list, uint32_t p_x_g } void RenderingDevice::compute_list_dispatch_threads(ComputeListID p_list, uint32_t p_x_threads, uint32_t p_y_threads, uint32_t p_z_threads) { + ERR_RENDER_THREAD_GUARD(); + ERR_FAIL_COND(p_list != ID_TYPE_COMPUTE_LIST); ERR_FAIL_NULL(compute_list); @@ -4578,6 +4830,8 @@ void RenderingDevice::compute_list_dispatch_threads(ComputeListID p_list, uint32 } void RenderingDevice::compute_list_dispatch_indirect(ComputeListID p_list, RID p_buffer, uint32_t p_offset) { + ERR_RENDER_THREAD_GUARD(); + ERR_FAIL_COND(p_list != ID_TYPE_COMPUTE_LIST); ERR_FAIL_NULL(compute_list); @@ -4661,10 +4915,12 @@ void RenderingDevice::compute_list_dispatch_indirect(ComputeListID p_list, RID p if (buffer->draw_tracker != nullptr) { draw_graph.add_compute_list_usage(buffer->draw_tracker, RDG::RESOURCE_USAGE_INDIRECT_BUFFER_READ); } + + _check_transfer_worker_buffer(buffer); } void RenderingDevice::compute_list_add_barrier(ComputeListID p_list) { - // Must be called within a compute list, the class mutex is locked during that time + ERR_RENDER_THREAD_GUARD(); compute_list_barrier_state = compute_list->state; compute_list_end(); @@ -4686,15 +4942,14 @@ void RenderingDevice::compute_list_add_barrier(ComputeListID p_list) { } void RenderingDevice::compute_list_end() { + ERR_RENDER_THREAD_GUARD(); + ERR_FAIL_NULL(compute_list); draw_graph.add_compute_list_end(); memdelete(compute_list); compute_list = nullptr; - - // Compute_list is no longer active. - _THREAD_SAFE_UNLOCK_ } #ifndef DISABLE_DEPRECATED @@ -4707,6 +4962,282 @@ void RenderingDevice::full_barrier() { } #endif +/*************************/ +/**** TRANSFER WORKER ****/ +/*************************/ + +static uint32_t _get_alignment_offset(uint32_t p_offset, uint32_t p_required_align) { + uint32_t alignment_offset = (p_required_align > 0) ? (p_offset % p_required_align) : 0; + if (alignment_offset != 0) { + // If a particular alignment is required, add the offset as part of the required size. + alignment_offset = p_required_align - alignment_offset; + } + + return alignment_offset; +} + +RenderingDevice::TransferWorker *RenderingDevice::_acquire_transfer_worker(uint32_t p_transfer_size, uint32_t p_required_align, uint32_t &r_staging_offset) { + // Find the first worker that is not currently executing anything and has enough size for the transfer. + // If no workers are available, we make a new one. If we're not allowed to make new ones, we wait until one of them is available. + TransferWorker *transfer_worker = nullptr; + uint32_t available_list_index = 0; + bool transfer_worker_busy = true; + bool transfer_worker_full = true; + { + MutexLock pool_lock(transfer_worker_pool_mutex); + + // If no workers are available and we've reached the max pool capacity, wait until one of them becomes available. + bool transfer_worker_pool_full = transfer_worker_pool.size() >= transfer_worker_pool_max_size; + while (transfer_worker_pool_available_list.is_empty() && transfer_worker_pool_full) { + transfer_worker_pool_condition.wait(pool_lock); + } + + // Look at all available workers first. + for (uint32_t i = 0; i < transfer_worker_pool_available_list.size(); i++) { + uint32_t worker_index = transfer_worker_pool_available_list[i]; + TransferWorker *candidate_worker = transfer_worker_pool[worker_index]; + candidate_worker->thread_mutex.lock(); + + // Figure out if the worker can fit the transfer. + uint32_t alignment_offset = _get_alignment_offset(candidate_worker->staging_buffer_size_in_use, p_required_align); + uint32_t required_size = candidate_worker->staging_buffer_size_in_use + p_transfer_size + alignment_offset; + bool candidate_worker_busy = candidate_worker->submitted; + bool candidate_worker_full = required_size > candidate_worker->staging_buffer_size_allocated; + bool pick_candidate = false; + if (!candidate_worker_busy && !candidate_worker_full) { + // A worker that can fit the transfer and is not waiting for a previous execution is the best possible candidate. + pick_candidate = true; + } else if (!candidate_worker_busy) { + // The worker can't fit the transfer but it's not currently doing anything. + // We pick it as a possible candidate if the current one is busy. + pick_candidate = transfer_worker_busy; + } else if (!candidate_worker_full) { + // The worker can fit the transfer but it's currently executing previous work. + // We pick it as a possible candidate if the current one is both busy and full. + pick_candidate = transfer_worker_busy && transfer_worker_full; + } else if (transfer_worker == nullptr) { + // The worker can't fit the transfer and it's currently executing work, so it's the worst candidate. + // We only pick if no candidate has been picked yet. + pick_candidate = true; + } + + if (pick_candidate) { + if (transfer_worker != nullptr) { + // Release the lock for the worker that was picked previously. + transfer_worker->thread_mutex.unlock(); + } + + // Keep the lock active for this worker. + transfer_worker = candidate_worker; + transfer_worker_busy = candidate_worker_busy; + transfer_worker_full = candidate_worker_full; + available_list_index = i; + + if (!transfer_worker_busy && !transfer_worker_full) { + // Best possible candidate, stop searching early. + break; + } + } else { + // Release the lock for the candidate. + candidate_worker->thread_mutex.unlock(); + } + } + + if (transfer_worker != nullptr) { + // A worker was picked, remove it from the available list. + transfer_worker_pool_available_list.remove_at(available_list_index); + } else { + DEV_ASSERT(!transfer_worker_pool_full && "A transfer worker should never be created when the pool is full."); + + // No existing worker was picked, we create a new one. + transfer_worker = memnew(TransferWorker); + transfer_worker->command_fence = driver->fence_create(); + transfer_worker->command_semaphore = driver->semaphore_create(); + transfer_worker->command_pool = driver->command_pool_create(transfer_queue_family, RDD::COMMAND_BUFFER_TYPE_PRIMARY); + transfer_worker->command_buffer = driver->command_buffer_create(transfer_worker->command_pool); + transfer_worker->index = transfer_worker_pool.size(); + transfer_worker_pool.push_back(transfer_worker); + transfer_worker_operation_used_by_draw.push_back(0); + transfer_worker->thread_mutex.lock(); + } + } + + if (transfer_worker->submitted) { + // Wait for the worker if the command buffer was submitted but it hasn't finished processing yet. + _wait_for_transfer_worker(transfer_worker); + } + + uint32_t alignment_offset = _get_alignment_offset(transfer_worker->staging_buffer_size_in_use, p_required_align); + transfer_worker->max_transfer_size = MAX(transfer_worker->max_transfer_size, p_transfer_size); + + uint32_t required_size = transfer_worker->staging_buffer_size_in_use + p_transfer_size + alignment_offset; + if (required_size > transfer_worker->staging_buffer_size_allocated) { + // If there's not enough bytes to use on the staging buffer, we submit everything pending from the worker and wait for the work to be finished. + if (transfer_worker->recording) { + _end_transfer_worker(transfer_worker); + _submit_transfer_worker(transfer_worker, false); + } + + if (transfer_worker->submitted) { + _wait_for_transfer_worker(transfer_worker); + } + + alignment_offset = 0; + + // If the staging buffer can't fit the transfer, we recreate the buffer. + const uint32_t expected_buffer_size_minimum = 16 * 1024; + uint32_t expected_buffer_size = MAX(transfer_worker->max_transfer_size, expected_buffer_size_minimum); + if (expected_buffer_size > transfer_worker->staging_buffer_size_allocated) { + if (transfer_worker->staging_buffer.id != 0) { + driver->buffer_free(transfer_worker->staging_buffer); + } + + uint32_t new_staging_buffer_size = next_power_of_2(expected_buffer_size); + transfer_worker->staging_buffer_size_allocated = new_staging_buffer_size; + transfer_worker->staging_buffer = driver->buffer_create(new_staging_buffer_size, RDD::BUFFER_USAGE_TRANSFER_FROM_BIT, RDD::MEMORY_ALLOCATION_TYPE_CPU); + } + } + + // Add the alignment before storing the offset that will be returned. + transfer_worker->staging_buffer_size_in_use += alignment_offset; + + // Store the offset to return and increment the current size. + r_staging_offset = transfer_worker->staging_buffer_size_in_use; + transfer_worker->staging_buffer_size_in_use += p_transfer_size; + + if (!transfer_worker->recording) { + // Begin the command buffer if the worker wasn't recording yet. + driver->command_buffer_begin(transfer_worker->command_buffer); + transfer_worker->recording = true; + } + + return transfer_worker; +} + +void RenderingDevice::_release_transfer_worker(TransferWorker *p_transfer_worker) { + p_transfer_worker->thread_mutex.unlock(); + + transfer_worker_pool_mutex.lock(); + transfer_worker_pool_available_list.push_back(p_transfer_worker->index); + transfer_worker_pool_mutex.unlock(); + transfer_worker_pool_condition.notify_one(); +} + +void RenderingDevice::_end_transfer_worker(TransferWorker *p_transfer_worker) { + driver->command_buffer_end(p_transfer_worker->command_buffer); + p_transfer_worker->recording = false; +} + +void RenderingDevice::_submit_transfer_worker(TransferWorker *p_transfer_worker, bool p_signal_semaphore) { + const VectorView execute_semaphore = p_signal_semaphore ? p_transfer_worker->command_semaphore : VectorView(); + driver->command_queue_execute_and_present(transfer_queue, {}, p_transfer_worker->command_buffer, execute_semaphore, p_transfer_worker->command_fence, {}); + if (p_signal_semaphore) { + // Indicate the frame should wait on these semaphores before executing the main command buffer. + frames[frame].semaphores_to_wait_on.push_back(p_transfer_worker->command_semaphore); + } + + p_transfer_worker->submitted = true; + + { + MutexLock lock(p_transfer_worker->operations_mutex); + p_transfer_worker->operations_submitted = p_transfer_worker->operations_counter; + } +} + +void RenderingDevice::_wait_for_transfer_worker(TransferWorker *p_transfer_worker) { + driver->fence_wait(p_transfer_worker->command_fence); + p_transfer_worker->staging_buffer_size_in_use = 0; + p_transfer_worker->submitted = false; + + { + MutexLock lock(p_transfer_worker->operations_mutex); + p_transfer_worker->operations_processed = p_transfer_worker->operations_submitted; + } +} + +void RenderingDevice::_check_transfer_worker_operation(uint32_t p_transfer_worker_index, uint64_t p_transfer_worker_operation) { + TransferWorker *transfer_worker = transfer_worker_pool[p_transfer_worker_index]; + MutexLock lock(transfer_worker->operations_mutex); + uint64_t &dst_operation = transfer_worker_operation_used_by_draw[transfer_worker->index]; + dst_operation = MAX(dst_operation, p_transfer_worker_operation); +} + +void RenderingDevice::_check_transfer_worker_buffer(Buffer *p_buffer) { + if (p_buffer->transfer_worker_index >= 0) { + _check_transfer_worker_operation(p_buffer->transfer_worker_index, p_buffer->transfer_worker_operation); + p_buffer->transfer_worker_index = -1; + } +} + +void RenderingDevice::_check_transfer_worker_texture(Texture *p_texture) { + if (p_texture->transfer_worker_index >= 0) { + _check_transfer_worker_operation(p_texture->transfer_worker_index, p_texture->transfer_worker_operation); + p_texture->transfer_worker_index = -1; + } +} + +void RenderingDevice::_check_transfer_worker_vertex_array(VertexArray *p_vertex_array) { + if (!p_vertex_array->transfer_worker_indices.is_empty()) { + for (int i = 0; i < p_vertex_array->transfer_worker_indices.size(); i++) { + _check_transfer_worker_operation(p_vertex_array->transfer_worker_indices[i], p_vertex_array->transfer_worker_operations[i]); + } + + p_vertex_array->transfer_worker_indices.clear(); + p_vertex_array->transfer_worker_operations.clear(); + } +} + +void RenderingDevice::_check_transfer_worker_index_array(IndexArray *p_index_array) { + if (p_index_array->transfer_worker_index >= 0) { + _check_transfer_worker_operation(p_index_array->transfer_worker_index, p_index_array->transfer_worker_operation); + p_index_array->transfer_worker_index = -1; + } +} + +void RenderingDevice::_submit_transfer_workers(bool p_operations_used_by_draw) { + MutexLock transfer_worker_lock(transfer_worker_pool_mutex); + for (TransferWorker *worker : transfer_worker_pool) { + if (p_operations_used_by_draw) { + MutexLock lock(worker->operations_mutex); + if (worker->operations_processed >= transfer_worker_operation_used_by_draw[worker->index]) { + // The operation used by the draw has already been processed, we don't need to wait on the worker. + continue; + } + } + + { + MutexLock lock(worker->thread_mutex); + if (worker->recording) { + _end_transfer_worker(worker); + _submit_transfer_worker(worker, true); + } + } + } +} + +void RenderingDevice::_wait_for_transfer_workers() { + MutexLock transfer_worker_lock(transfer_worker_pool_mutex); + for (TransferWorker *worker : transfer_worker_pool) { + MutexLock lock(worker->thread_mutex); + if (worker->submitted) { + _wait_for_transfer_worker(worker); + } + } +} + +void RenderingDevice::_free_transfer_workers() { + MutexLock transfer_worker_lock(transfer_worker_pool_mutex); + for (TransferWorker *worker : transfer_worker_pool) { + driver->semaphore_free(worker->command_semaphore); + driver->fence_free(worker->command_fence); + driver->buffer_free(worker->staging_buffer); + driver->command_pool_free(worker->command_pool); + memdelete(worker); + } + + transfer_worker_pool.clear(); +} + /***********************/ /**** COMMAND GRAPH ****/ /***********************/ @@ -4851,7 +5382,7 @@ bool RenderingDevice::_dependency_make_mutable(RID p_id, RID p_resource_id, RDG: } } -bool RenderingDevice::_dependencies_make_mutable(RID p_id, RDG::ResourceTracker *p_resource_tracker) { +bool RenderingDevice::_dependencies_make_mutable_recursive(RID p_id, RDG::ResourceTracker *p_resource_tracker) { bool made_mutable = false; HashMap>::Iterator E = dependency_map.find(p_id); if (E) { @@ -4863,12 +5394,17 @@ bool RenderingDevice::_dependencies_make_mutable(RID p_id, RDG::ResourceTracker return made_mutable; } +bool RenderingDevice::_dependencies_make_mutable(RID p_id, RDG::ResourceTracker *p_resource_tracker) { + _THREAD_SAFE_METHOD_ + return _dependencies_make_mutable_recursive(p_id, p_resource_tracker); +} + /**************************/ /**** FRAME MANAGEMENT ****/ /**************************/ void RenderingDevice::free(RID p_id) { - _THREAD_SAFE_METHOD_ + ERR_RENDER_THREAD_GUARD(); _free_dependencies(p_id); // Recursively erase dependencies first, to avoid potential API problems. _free_internal(p_id); @@ -4886,6 +5422,8 @@ void RenderingDevice::_free_internal(RID p_id) { // Push everything so it's disposed of next time this frame index is processed (means, it's safe to do it). if (texture_owner.owns(p_id)) { Texture *texture = texture_owner.get_or_null(p_id); + _check_transfer_worker_texture(texture); + RDG::ResourceTracker *draw_tracker = texture->draw_tracker; if (draw_tracker != nullptr) { draw_tracker->reference_count--; @@ -4919,6 +5457,8 @@ void RenderingDevice::_free_internal(RID p_id) { sampler_owner.free(p_id); } else if (vertex_buffer_owner.owns(p_id)) { Buffer *vertex_buffer = vertex_buffer_owner.get_or_null(p_id); + _check_transfer_worker_buffer(vertex_buffer); + RDG::resource_tracker_free(vertex_buffer->draw_tracker); frames[frame].buffers_to_dispose_of.push_back(*vertex_buffer); vertex_buffer_owner.free(p_id); @@ -4926,6 +5466,8 @@ void RenderingDevice::_free_internal(RID p_id) { vertex_array_owner.free(p_id); } else if (index_buffer_owner.owns(p_id)) { IndexBuffer *index_buffer = index_buffer_owner.get_or_null(p_id); + _check_transfer_worker_buffer(index_buffer); + RDG::resource_tracker_free(index_buffer->draw_tracker); frames[frame].buffers_to_dispose_of.push_back(*index_buffer); index_buffer_owner.free(p_id); @@ -4939,16 +5481,22 @@ void RenderingDevice::_free_internal(RID p_id) { shader_owner.free(p_id); } else if (uniform_buffer_owner.owns(p_id)) { Buffer *uniform_buffer = uniform_buffer_owner.get_or_null(p_id); + _check_transfer_worker_buffer(uniform_buffer); + RDG::resource_tracker_free(uniform_buffer->draw_tracker); frames[frame].buffers_to_dispose_of.push_back(*uniform_buffer); uniform_buffer_owner.free(p_id); } else if (texture_buffer_owner.owns(p_id)) { Buffer *texture_buffer = texture_buffer_owner.get_or_null(p_id); + _check_transfer_worker_buffer(texture_buffer); + RDG::resource_tracker_free(texture_buffer->draw_tracker); frames[frame].buffers_to_dispose_of.push_back(*texture_buffer); texture_buffer_owner.free(p_id); } else if (storage_buffer_owner.owns(p_id)) { Buffer *storage_buffer = storage_buffer_owner.get_or_null(p_id); + _check_transfer_worker_buffer(storage_buffer); + RDG::resource_tracker_free(storage_buffer->draw_tracker); frames[frame].buffers_to_dispose_of.push_back(*storage_buffer); storage_buffer_owner.free(p_id); @@ -4980,6 +5528,8 @@ void RenderingDevice::_free_internal(RID p_id) { // The full list of resources that can be named is in the VkObjectType enum. // We just expose the resources that are owned and can be accessed easily. void RenderingDevice::set_resource_name(RID p_id, const String &p_name) { + _THREAD_SAFE_METHOD_ + if (texture_owner.owns(p_id)) { Texture *texture = texture_owner.get_or_null(p_id); driver->set_object_name(RDD::OBJECT_TYPE_TEXTURE, texture->driver_id, p_name); @@ -5026,6 +5576,8 @@ void RenderingDevice::set_resource_name(RID p_id, const String &p_name) { } void RenderingDevice::draw_command_begin_label(String p_label_name, const Color &p_color) { + ERR_RENDER_THREAD_GUARD(); + if (!context->is_debug_utils_enabled()) { return; } @@ -5040,6 +5592,8 @@ void RenderingDevice::draw_command_insert_label(String p_label_name, const Color #endif void RenderingDevice::draw_command_end_label() { + ERR_RENDER_THREAD_GUARD(); + draw_graph.end_label(); } @@ -5072,7 +5626,7 @@ String RenderingDevice::get_device_pipeline_cache_uuid() const { } void RenderingDevice::swap_buffers() { - _THREAD_SAFE_METHOD_ + ERR_RENDER_THREAD_GUARD(); _end_frame(); _execute_frame(true); @@ -5083,18 +5637,20 @@ void RenderingDevice::swap_buffers() { } void RenderingDevice::submit() { - _THREAD_SAFE_METHOD_ + ERR_RENDER_THREAD_GUARD(); ERR_FAIL_COND_MSG(is_main_instance, "Only local devices can submit and sync."); ERR_FAIL_COND_MSG(local_device_processing, "device already submitted, call sync to wait until done."); + _end_frame(); _execute_frame(false); local_device_processing = true; } void RenderingDevice::sync() { - _THREAD_SAFE_METHOD_ + ERR_RENDER_THREAD_GUARD(); ERR_FAIL_COND_MSG(is_main_instance, "Only local devices can submit and sync."); ERR_FAIL_COND_MSG(!local_device_processing, "sync can only be called after a submit"); + _begin_frame(); local_device_processing = false; } @@ -5207,17 +5763,16 @@ uint64_t RenderingDevice::get_memory_usage(MemoryType p_type) const { void RenderingDevice::_begin_frame() { // Before beginning this frame, wait on the fence if it was signaled to make sure its work is finished. - if (frames[frame].draw_fence_signaled) { - driver->fence_wait(frames[frame].draw_fence); - frames[frame].draw_fence_signaled = false; + if (frames[frame].fence_signaled) { + driver->fence_wait(frames[frame].fence); + frames[frame].fence_signaled = false; } update_perf_report(); // Begin recording on the frame's command buffers. driver->begin_segment(frame, frames_drawn++); - driver->command_buffer_begin(frames[frame].setup_command_buffer); - driver->command_buffer_begin(frames[frame].draw_command_buffer); + driver->command_buffer_begin(frames[frame].command_buffer); // Reset the graph. draw_graph.begin(); @@ -5233,7 +5788,7 @@ void RenderingDevice::_begin_frame() { if (frames[frame].timestamp_count) { driver->timestamp_query_pool_get_results(frames[frame].timestamp_pool, frames[frame].timestamp_count, frames[frame].timestamp_result_values.ptr()); - driver->command_timestamp_query_pool_reset(frames[frame].setup_command_buffer, frames[frame].timestamp_pool, frames[frame].timestamp_count); + driver->command_timestamp_query_pool_reset(frames[frame].command_buffer, frames[frame].timestamp_pool, frames[frame].timestamp_count); SWAP(frames[frame].timestamp_names, frames[frame].timestamp_result_names); SWAP(frames[frame].timestamp_cpu_values, frames[frame].timestamp_cpu_result_values); } @@ -5252,10 +5807,10 @@ void RenderingDevice::_end_frame() { ERR_PRINT("Found open compute list at the end of the frame, this should never happen (further compute will likely not work)."); } - driver->command_buffer_end(frames[frame].setup_command_buffer); + _submit_transfer_workers(true); // The command buffer must be copied into a stack variable as the driver workarounds can change the command buffer in use. - RDD::CommandBufferID command_buffer = frames[frame].draw_command_buffer; + RDD::CommandBufferID command_buffer = frames[frame].command_buffer; draw_graph.end(RENDER_GRAPH_REORDER, RENDER_GRAPH_FULL_BARRIERS, command_buffer, frames[frame].command_buffer_pool); driver->command_buffer_end(command_buffer); driver->end_segment(); @@ -5268,9 +5823,6 @@ void RenderingDevice::_execute_frame(bool p_present) { thread_local LocalVector swap_chains; swap_chains.clear(); - // Execute the setup command buffer. - driver->command_queue_execute_and_present(main_queue, {}, frames[frame].setup_command_buffer, frames[frame].setup_semaphore, {}, {}); - // Execute command buffers and use semaphores to wait on the execution of the previous one. Normally there's only one command buffer, // but driver workarounds can force situations where there'll be more. uint32_t command_buffer_count = 1; @@ -5280,7 +5832,9 @@ void RenderingDevice::_execute_frame(bool p_present) { buffer_pool.buffers_used = 0; } - RDD::SemaphoreID wait_semaphore = frames[frame].setup_semaphore; + thread_local LocalVector wait_semaphores; + wait_semaphores = frames[frame].semaphores_to_wait_on; + for (uint32_t i = 0; i < command_buffer_count; i++) { RDD::CommandBufferID command_buffer; RDD::SemaphoreID signal_semaphore; @@ -5289,14 +5843,14 @@ void RenderingDevice::_execute_frame(bool p_present) { command_buffer = buffer_pool.buffers[i - 1]; signal_semaphore = buffer_pool.semaphores[i - 1]; } else { - command_buffer = frames[frame].draw_command_buffer; - signal_semaphore = frames[frame].draw_semaphore; + command_buffer = frames[frame].command_buffer; + signal_semaphore = frames[frame].semaphore; } bool signal_semaphore_valid; if (i == (command_buffer_count - 1)) { // This is the last command buffer, it should signal the fence. - signal_fence = frames[frame].draw_fence; + signal_fence = frames[frame].fence; signal_semaphore_valid = false; if (frame_can_present && separate_present_queue) { @@ -5311,19 +5865,21 @@ void RenderingDevice::_execute_frame(bool p_present) { signal_semaphore_valid = true; } - driver->command_queue_execute_and_present(main_queue, wait_semaphore, command_buffer, signal_semaphore_valid ? signal_semaphore : VectorView(), signal_fence, swap_chains); + driver->command_queue_execute_and_present(main_queue, wait_semaphores, command_buffer, signal_semaphore_valid ? signal_semaphore : VectorView(), signal_fence, swap_chains); // Make the next command buffer wait on the semaphore signaled by this one. - wait_semaphore = signal_semaphore; + wait_semaphores.resize(1); + wait_semaphores[0] = signal_semaphore; } // Indicate the fence has been signaled so the next time the frame's contents need to be used, the CPU needs to wait on the work to be completed. - frames[frame].draw_fence_signaled = true; + frames[frame].semaphores_to_wait_on.clear(); + frames[frame].fence_signaled = true; if (frame_can_present) { if (separate_present_queue) { // Issue the presentation separately if the presentation queue is different from the main queue. - driver->command_queue_execute_and_present(present_queue, wait_semaphore, {}, {}, {}, frames[frame].swap_chains_to_present); + driver->command_queue_execute_and_present(present_queue, wait_semaphores, {}, {}, {}, frames[frame].swap_chains_to_present); } frames[frame].swap_chains_to_present.clear(); @@ -5332,9 +5888,9 @@ void RenderingDevice::_execute_frame(bool p_present) { void RenderingDevice::_stall_for_previous_frames() { for (uint32_t i = 0; i < frames.size(); i++) { - if (frames[i].draw_fence_signaled) { - driver->fence_wait(frames[i].draw_fence); - frames[i].draw_fence_signaled = false; + if (frames[i].fence_signaled) { + driver->fence_wait(frames[i].fence); + frames[i].fence_signaled = false; } } } @@ -5347,8 +5903,9 @@ void RenderingDevice::_flush_and_stall_for_all_frames() { } Error RenderingDevice::initialize(RenderingContextDriver *p_context, DisplayServer::WindowID p_main_window) { - Error err; + ERR_RENDER_THREAD_GUARD_V(ERR_UNAVAILABLE); + Error err; RenderingContextDriver::SurfaceID main_surface = 0; is_main_instance = (singleton == this) && (p_main_window != DisplayServer::INVALID_WINDOW_ID); if (p_main_window != DisplayServer::INVALID_WINDOW_ID) { @@ -5436,12 +5993,25 @@ Error RenderingDevice::initialize(RenderingContextDriver *p_context, DisplayServ main_queue = driver->command_queue_create(main_queue_family, true); ERR_FAIL_COND_V(!main_queue, FAILED); + transfer_queue_family = driver->command_queue_family_get(RDD::COMMAND_QUEUE_FAMILY_TRANSFER_BIT); + if (transfer_queue_family) { + // Create the transfer queue. + transfer_queue = driver->command_queue_create(transfer_queue_family); + ERR_FAIL_COND_V(!transfer_queue, FAILED); + } else { + // Use main queue as the transfer queue. + transfer_queue = main_queue; + transfer_queue_family = main_queue_family; + } + if (present_queue_family) { - // Create the presentation queue. + // Create the present queue. present_queue = driver->command_queue_create(present_queue_family); ERR_FAIL_COND_V(!present_queue, FAILED); } else { + // Use main queue as the present queue. present_queue = main_queue; + present_queue_family = main_queue_family; } // Create data for all the frames. @@ -5451,17 +6021,13 @@ Error RenderingDevice::initialize(RenderingContextDriver *p_context, DisplayServ // Create command pool, command buffers, semaphores and fences. frames[i].command_pool = driver->command_pool_create(main_queue_family, RDD::COMMAND_BUFFER_TYPE_PRIMARY); ERR_FAIL_COND_V(!frames[i].command_pool, FAILED); - frames[i].setup_command_buffer = driver->command_buffer_create(frames[i].command_pool); - ERR_FAIL_COND_V(!frames[i].setup_command_buffer, FAILED); - frames[i].draw_command_buffer = driver->command_buffer_create(frames[i].command_pool); - ERR_FAIL_COND_V(!frames[i].draw_command_buffer, FAILED); - frames[i].setup_semaphore = driver->semaphore_create(); - ERR_FAIL_COND_V(!frames[i].setup_semaphore, FAILED); - frames[i].draw_semaphore = driver->semaphore_create(); - ERR_FAIL_COND_V(!frames[i].draw_semaphore, FAILED); - frames[i].draw_fence = driver->fence_create(); - ERR_FAIL_COND_V(!frames[i].draw_fence, FAILED); - frames[i].draw_fence_signaled = false; + frames[i].command_buffer = driver->command_buffer_create(frames[i].command_pool); + ERR_FAIL_COND_V(!frames[i].command_buffer, FAILED); + frames[i].semaphore = driver->semaphore_create(); + ERR_FAIL_COND_V(!frames[i].semaphore, FAILED); + frames[i].fence = driver->fence_create(); + ERR_FAIL_COND_V(!frames[i].fence, FAILED); + frames[i].fence_signaled = false; // Create query pool. frames[i].timestamp_pool = driver->timestamp_query_pool_create(max_timestamp_query_elements); @@ -5482,8 +6048,7 @@ Error RenderingDevice::initialize(RenderingContextDriver *p_context, DisplayServ // Initialize recording on the first frame. driver->begin_segment(frame, frames_drawn++); - driver->command_buffer_begin(frames[0].setup_command_buffer); - driver->command_buffer_begin(frames[0].draw_command_buffer); + driver->command_buffer_begin(frames[0].command_buffer); // Create draw graph and start it initialized as well. draw_graph.initialize(driver, device, frames.size(), main_queue_family, SECONDARY_COMMAND_BUFFERS_PER_FRAME); @@ -5491,7 +6056,7 @@ Error RenderingDevice::initialize(RenderingContextDriver *p_context, DisplayServ for (uint32_t i = 0; i < frames.size(); i++) { // Reset all queries in a query pool before doing any operations with them.. - driver->command_timestamp_query_pool_reset(frames[0].setup_command_buffer, frames[i].timestamp_pool, max_timestamp_query_elements); + driver->command_timestamp_query_pool_reset(frames[0].command_buffer, frames[i].timestamp_pool, max_timestamp_query_elements); } // Convert block size from KB. @@ -5522,6 +6087,9 @@ Error RenderingDevice::initialize(RenderingContextDriver *p_context, DisplayServ ERR_FAIL_COND_V(err, FAILED); } + // TODO: How should this max size be determined? + transfer_worker_pool_max_size = OS::get_singleton()->get_processor_count(); + draw_list = nullptr; compute_list = nullptr; @@ -5560,6 +6128,8 @@ Vector RenderingDevice::_load_pipeline_cache() { } void RenderingDevice::_update_pipeline_cache(bool p_closing) { + _THREAD_SAFE_METHOD_ + { bool still_saving = pipeline_cache_save_task != WorkerThreadPool::INVALID_TASK_ID && !WorkerThreadPool::get_singleton()->is_task_completed(pipeline_cache_save_task); if (still_saving) { @@ -5641,6 +6211,8 @@ void RenderingDevice::_free_rids(T &p_owner, const char *p_type) { } void RenderingDevice::capture_timestamp(const String &p_name) { + ERR_RENDER_THREAD_GUARD(); + ERR_FAIL_COND_MSG(draw_list != nullptr && draw_list->state.draw_count > 0, "Capturing timestamps during draw list creation is not allowed. Offending timestamp was: " + p_name); ERR_FAIL_COND_MSG(compute_list != nullptr && compute_list->state.dispatch_count > 0, "Capturing timestamps during compute list creation is not allowed. Offending timestamp was: " + p_name); ERR_FAIL_COND_MSG(frames[frame].timestamp_count >= max_timestamp_query_elements, vformat("Tried capturing more timestamps than the configured maximum (%d). You can increase this limit in the project settings under 'Debug/Settings' called 'Max Timestamp Query Elements'.", max_timestamp_query_elements)); @@ -5653,7 +6225,7 @@ void RenderingDevice::capture_timestamp(const String &p_name) { } uint64_t RenderingDevice::get_driver_resource(DriverResource p_resource, RID p_rid, uint64_t p_index) { - _THREAD_SAFE_METHOD_ + ERR_RENDER_THREAD_GUARD_V(0); uint64_t driver_id = 0; switch (p_resource) { @@ -5769,19 +6341,23 @@ uint64_t RenderingDevice::get_device_allocs_by_object_type(uint32_t type) const } uint32_t RenderingDevice::get_captured_timestamps_count() const { + ERR_RENDER_THREAD_GUARD_V(0); return frames[frame].timestamp_result_count; } uint64_t RenderingDevice::get_captured_timestamps_frame() const { + ERR_RENDER_THREAD_GUARD_V(0); return frames[frame].index; } uint64_t RenderingDevice::get_captured_timestamp_gpu_time(uint32_t p_index) const { + ERR_RENDER_THREAD_GUARD_V(0); ERR_FAIL_UNSIGNED_INDEX_V(p_index, frames[frame].timestamp_result_count, 0); return driver->timestamp_query_result_to_time(frames[frame].timestamp_result_values[p_index]); } uint64_t RenderingDevice::get_captured_timestamp_cpu_time(uint32_t p_index) const { + ERR_RENDER_THREAD_GUARD_V(0); ERR_FAIL_UNSIGNED_INDEX_V(p_index, frames[frame].timestamp_result_count, 0); return frames[frame].timestamp_cpu_result_values[p_index]; } @@ -5796,11 +6372,17 @@ uint64_t RenderingDevice::limit_get(Limit p_limit) const { } void RenderingDevice::finalize() { + ERR_RENDER_THREAD_GUARD(); + if (!frames.is_empty()) { // Wait for all frames to have finished rendering. _flush_and_stall_for_all_frames(); } + // Wait for transfer workers to finish. + _submit_transfer_workers(false); + _wait_for_transfer_workers(); + // Delete everything the graph has created. draw_graph.finalize(); @@ -5854,15 +6436,17 @@ void RenderingDevice::finalize() { } } + // Erase the transfer workers after all resources have been freed. + _free_transfer_workers(); + // Free everything pending. for (uint32_t i = 0; i < frames.size(); i++) { int f = (frame + i) % frames.size(); _free_pending_resources(f); driver->command_pool_free(frames[i].command_pool); driver->timestamp_query_pool_free(frames[i].timestamp_pool); - driver->semaphore_free(frames[i].setup_semaphore); - driver->semaphore_free(frames[i].draw_semaphore); - driver->fence_free(frames[i].draw_fence); + driver->semaphore_free(frames[i].semaphore); + driver->fence_free(frames[i].fence); RDG::CommandBufferPool &buffer_pool = frames[i].command_buffer_pool; for (uint32_t j = 0; j < buffer_pool.buffers.size(); j++) { @@ -5909,6 +6493,15 @@ void RenderingDevice::finalize() { present_queue = RDD::CommandQueueID(); } + if (transfer_queue) { + if (main_queue != transfer_queue) { + // Only delete the transfer queue if it's unique. + driver->command_queue_free(transfer_queue); + } + + transfer_queue = RDD::CommandQueueID(); + } + if (main_queue) { driver->command_queue_free(main_queue); main_queue = RDD::CommandQueueID(); @@ -6644,6 +7237,8 @@ RenderingDevice::RenderingDevice() { if (singleton == nullptr) { singleton = this; } + + render_thread_id = Thread::get_caller_id(); } /*****************/ diff --git a/servers/rendering/rendering_device.h b/servers/rendering/rendering_device.h index d8bf84575601..19d21dfda215 100644 --- a/servers/rendering/rendering_device.h +++ b/servers/rendering/rendering_device.h @@ -33,6 +33,7 @@ #include "core/object/class_db.h" #include "core/object/worker_thread_pool.h" +#include "core/os/condition_variable.h" #include "core/os/thread_safe.h" #include "core/templates/local_vector.h" #include "core/templates/oa_hash_map.h" @@ -62,6 +63,10 @@ class RenderingDevice : public RenderingDeviceCommons { GDCLASS(RenderingDevice, Object) _THREAD_SAFE_CLASS_ + +private: + Thread::ID render_thread_id; + public: enum ShaderLanguage { SHADER_LANGUAGE_GLSL, @@ -178,10 +183,12 @@ class RenderingDevice : public RenderingDeviceCommons { uint32_t size = 0; BitField usage; RDG::ResourceTracker *draw_tracker = nullptr; + int32_t transfer_worker_index = -1; + uint64_t transfer_worker_operation = 0; }; Buffer *_get_buffer_from_owner(RID p_buffer); - Error _buffer_update(Buffer *p_buffer, RID p_buffer_id, size_t p_offset, const uint8_t *p_data, size_t p_data_size, bool p_use_draw_queue = false, uint32_t p_required_align = 32); + Error _buffer_initialize(Buffer *p_buffer, const uint8_t *p_data, size_t p_data_size, uint32_t p_required_align = 32); void update_perf_report(); @@ -189,9 +196,9 @@ class RenderingDevice : public RenderingDeviceCommons { uint32_t copy_bytes_count = 0; String perf_report_text; - RID_Owner uniform_buffer_owner; - RID_Owner storage_buffer_owner; - RID_Owner texture_buffer_owner; + RID_Owner uniform_buffer_owner; + RID_Owner storage_buffer_owner; + RID_Owner texture_buffer_owner; public: Error buffer_copy(RID p_src_buffer, RID p_dst_buffer, uint32_t p_src_offset, uint32_t p_dst_offset, uint32_t p_size); @@ -254,6 +261,8 @@ class RenderingDevice : public RenderingDeviceCommons { RDG::ResourceTracker *draw_tracker = nullptr; HashMap slice_trackers; SharedFallback *shared_fallback = nullptr; + int32_t transfer_worker_index = -1; + uint64_t transfer_worker_operation = 0; RDD::TextureSubresourceRange barrier_range() const { RDD::TextureSubresourceRange r; @@ -282,11 +291,13 @@ class RenderingDevice : public RenderingDeviceCommons { } }; - RID_Owner texture_owner; + RID_Owner texture_owner; uint32_t texture_upload_region_size_px = 0; Vector _texture_get_data(Texture *tex, uint32_t p_layer, bool p_2d = false); - Error _texture_update(RID p_texture, uint32_t p_layer, const Vector &p_data, bool p_use_setup_queue, bool p_validate_can_update); + uint32_t _texture_layer_count(Texture *p_texture) const; + uint32_t _texture_alignment(Texture *p_texture) const; + Error _texture_initialize(RID p_texture, uint32_t p_layer, const Vector &p_data); void _texture_check_shared_fallback(Texture *p_texture); void _texture_update_shared_fallback(RID p_texture_rid, Texture *p_texture, bool p_for_writing); void _texture_free_shared_fallback(Texture *p_texture); @@ -572,7 +583,7 @@ class RenderingDevice : public RenderingDeviceCommons { uint32_t view_count; }; - RID_Owner framebuffer_owner; + RID_Owner framebuffer_owner; public: // This ID is warranted to be unique for the same formats, does not need to be freed @@ -593,7 +604,7 @@ class RenderingDevice : public RenderingDeviceCommons { /**** SAMPLER ****/ /*****************/ private: - RID_Owner sampler_owner; + RID_Owner sampler_owner; public: RID sampler_create(const SamplerState &p_state); @@ -615,7 +626,7 @@ class RenderingDevice : public RenderingDeviceCommons { // This mapping is done here internally, and it's not // exposed. - RID_Owner vertex_buffer_owner; + RID_Owner vertex_buffer_owner; struct VertexDescriptionKey { Vector vertex_formats; @@ -695,10 +706,12 @@ class RenderingDevice : public RenderingDeviceCommons { Vector buffers; // Not owned, just referenced. Vector draw_trackers; // Not owned, just referenced. Vector offsets; + Vector transfer_worker_indices; + Vector transfer_worker_operations; HashSet untracked_buffers; }; - RID_Owner vertex_array_owner; + RID_Owner vertex_array_owner; struct IndexBuffer : public Buffer { uint32_t max_index = 0; // Used for validation. @@ -707,7 +720,7 @@ class RenderingDevice : public RenderingDeviceCommons { bool supports_restart_indices = false; }; - RID_Owner index_buffer_owner; + RID_Owner index_buffer_owner; struct IndexArray { uint32_t max_index = 0; // Remember the maximum index here too, for validation. @@ -717,9 +730,11 @@ class RenderingDevice : public RenderingDeviceCommons { uint32_t indices = 0; IndexBufferFormat format = INDEX_BUFFER_FORMAT_UINT16; bool supports_restart_indices = false; + int32_t transfer_worker_index = -1; + uint64_t transfer_worker_operation = 0; }; - RID_Owner index_array_owner; + RID_Owner index_array_owner; public: RID vertex_buffer_create(uint32_t p_size_bytes, const Vector &p_data = Vector(), bool p_use_as_storage = false); @@ -796,7 +811,7 @@ class RenderingDevice : public RenderingDeviceCommons { String _shader_uniform_debug(RID p_shader, int p_set = -1); - RID_Owner shader_owner; + RID_Owner shader_owner; #ifndef DISABLE_DEPRECATED public: @@ -977,7 +992,7 @@ class RenderingDevice : public RenderingDeviceCommons { void *invalidated_callback_userdata = nullptr; }; - RID_Owner uniform_set_owner; + RID_Owner uniform_set_owner; void _uniform_set_update_shared(UniformSet *p_uniform_set); @@ -1024,7 +1039,7 @@ class RenderingDevice : public RenderingDeviceCommons { uint32_t push_constant_size = 0; }; - RID_Owner render_pipeline_owner; + RID_Owner render_pipeline_owner; bool pipeline_cache_enabled = false; size_t pipeline_cache_size = 0; @@ -1045,7 +1060,7 @@ class RenderingDevice : public RenderingDeviceCommons { uint32_t local_group_size[3] = { 0, 0, 0 }; }; - RID_Owner compute_pipeline_owner; + RID_Owner compute_pipeline_owner; public: RID render_pipeline_create(RID p_shader, FramebufferFormatID p_framebuffer_format, VertexFormatID p_vertex_format, RenderPrimitive p_render_primitive, const PipelineRasterizationState &p_rasterization_state, const PipelineMultisampleState &p_multisample_state, const PipelineDepthStencilState &p_depth_stencil_state, const PipelineColorBlendState &p_blend_state, BitField p_dynamic_state_flags = 0, uint32_t p_for_render_pass = 0, const Vector &p_specialization_constants = Vector()); @@ -1101,6 +1116,7 @@ class RenderingDevice : public RenderingDeviceCommons { RID pipeline_shader; RDD::ShaderID pipeline_shader_driver_id; uint32_t pipeline_shader_layout_hash = 0; + uint32_t pipeline_push_constant_size = 0; RID vertex_array; RID index_array; uint32_t draw_count = 0; @@ -1153,8 +1169,6 @@ class RenderingDevice : public RenderingDeviceCommons { void _draw_list_insert_clear_region(DrawList *p_draw_list, Framebuffer *p_framebuffer, Point2i p_viewport_offset, Point2i p_viewport_size, bool p_clear_color, const Vector &p_clear_colors, bool p_clear_depth, float p_depth, uint32_t p_stencil); Error _draw_list_setup_framebuffer(Framebuffer *p_framebuffer, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, RDD::FramebufferID *r_framebuffer, RDD::RenderPassID *r_render_pass, uint32_t *r_subpass_count); Error _draw_list_render_pass_begin(Framebuffer *p_framebuffer, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, const Vector &p_clear_colors, float p_clear_depth, uint32_t p_clear_stencil, Point2i p_viewport_offset, Point2i p_viewport_size, RDD::FramebufferID p_framebuffer_driver_id, RDD::RenderPassID p_render_pass, uint32_t p_breadcrumb); - void _draw_list_set_viewport(Rect2i p_rect); - void _draw_list_set_scissor(Rect2i p_rect); _FORCE_INLINE_ DrawList *_get_draw_list_ptr(DrawListID p_id); Error _draw_list_allocate(const Rect2i &p_viewport, uint32_t p_subpass); void _draw_list_free(Rect2i *r_last_viewport = nullptr); @@ -1240,6 +1254,50 @@ class RenderingDevice : public RenderingDeviceCommons { void compute_list_end(); private: + /*************************/ + /**** TRANSFER WORKER ****/ + /*************************/ + + struct TransferWorker { + uint32_t index = 0; + RDD::BufferID staging_buffer; + uint32_t max_transfer_size = 0; + uint32_t staging_buffer_size_in_use = 0; + uint32_t staging_buffer_size_allocated = 0; + RDD::CommandBufferID command_buffer; + RDD::CommandPoolID command_pool; + RDD::FenceID command_fence; + RDD::SemaphoreID command_semaphore; + bool recording = false; + bool submitted = false; + BinaryMutex thread_mutex; + uint64_t operations_processed = 0; + uint64_t operations_submitted = 0; + uint64_t operations_counter = 0; + BinaryMutex operations_mutex; + }; + + LocalVector transfer_worker_pool; + uint32_t transfer_worker_pool_max_size = 1; + LocalVector transfer_worker_operation_used_by_draw; + LocalVector transfer_worker_pool_available_list; + BinaryMutex transfer_worker_pool_mutex; + ConditionVariable transfer_worker_pool_condition; + + TransferWorker *_acquire_transfer_worker(uint32_t p_transfer_size, uint32_t p_required_align, uint32_t &r_staging_offset); + void _release_transfer_worker(TransferWorker *p_transfer_worker); + void _end_transfer_worker(TransferWorker *p_transfer_worker); + void _submit_transfer_worker(TransferWorker *p_transfer_worker, bool p_signal_semaphore); + void _wait_for_transfer_worker(TransferWorker *p_transfer_worker); + void _check_transfer_worker_operation(uint32_t p_transfer_worker_index, uint64_t p_transfer_worker_operation); + void _check_transfer_worker_buffer(Buffer *p_buffer); + void _check_transfer_worker_texture(Texture *p_texture); + void _check_transfer_worker_vertex_array(VertexArray *p_vertex_array); + void _check_transfer_worker_index_array(IndexArray *p_index_array); + void _submit_transfer_workers(bool p_operations_used_by_draw); + void _wait_for_transfer_workers(); + void _free_transfer_workers(); + /***********************/ /**** COMMAND GRAPH ****/ /***********************/ @@ -1250,6 +1308,7 @@ class RenderingDevice : public RenderingDeviceCommons { bool _index_array_make_mutable(IndexArray *p_index_array, RDG::ResourceTracker *p_resource_tracker); bool _uniform_set_make_mutable(UniformSet *p_uniform_set, RID p_resource_id, RDG::ResourceTracker *p_resource_tracker); bool _dependency_make_mutable(RID p_id, RID p_resource_id, RDG::ResourceTracker *p_resource_tracker); + bool _dependencies_make_mutable_recursive(RID p_id, RDG::ResourceTracker *p_resource_tracker); bool _dependencies_make_mutable(RID p_id, RDG::ResourceTracker *p_resource_tracker); RenderingDeviceGraph draw_graph; @@ -1259,8 +1318,10 @@ class RenderingDevice : public RenderingDeviceCommons { /**************************/ RDD::CommandQueueFamilyID main_queue_family; + RDD::CommandQueueFamilyID transfer_queue_family; RDD::CommandQueueFamilyID present_queue_family; RDD::CommandQueueID main_queue; + RDD::CommandQueueID transfer_queue; RDD::CommandQueueID present_queue; /**************************/ @@ -1292,28 +1353,21 @@ class RenderingDevice : public RenderingDeviceCommons { List render_pipelines_to_dispose_of; List compute_pipelines_to_dispose_of; + // The command pool used by the command buffer. RDD::CommandPoolID command_pool; - // Used at the beginning of every frame for set-up. - // Used for filling up newly created buffers with data provided on creation. - // Primarily intended to be accessed by worker threads. - // Ideally this command buffer should use an async transfer queue. - RDD::CommandBufferID setup_command_buffer; - - // The main command buffer for drawing and compute. - // Primarily intended to be used by the main thread to do most stuff. - RDD::CommandBufferID draw_command_buffer; + // The command buffer used by the main thread when recording the frame. + RDD::CommandBufferID command_buffer; - // Signaled by the setup submission. Draw must wait on this semaphore. - RDD::SemaphoreID setup_semaphore; + // Signaled by the command buffer submission. Present must wait on this semaphore. + RDD::SemaphoreID semaphore; - // Signaled by the draw submission. Present must wait on this semaphore. - RDD::SemaphoreID draw_semaphore; + // Signaled by the command buffer submission. Must wait on this fence before beginning command recording for the frame. + RDD::FenceID fence; + bool fence_signaled = false; - // Signaled by the draw submission. Must wait on this fence before beginning - // command recording for the frame. - RDD::FenceID draw_fence; - bool draw_fence_signaled = false; + // Semaphores the frame must wait on before executing the command buffer. + LocalVector semaphores_to_wait_on; // Swap chains prepared for drawing during the frame that must be presented. LocalVector swap_chains_to_present; diff --git a/servers/rendering/rendering_device_commons.cpp b/servers/rendering/rendering_device_commons.cpp index 4dbd0e396476..03fad5493a01 100644 --- a/servers/rendering/rendering_device_commons.cpp +++ b/servers/rendering/rendering_device_commons.cpp @@ -600,7 +600,7 @@ void RenderingDeviceCommons::get_compressed_image_format_block_dimensions(DataFo } } -uint32_t RenderingDeviceCommons::get_compressed_image_format_block_byte_size(DataFormat p_format) { +uint32_t RenderingDeviceCommons::get_compressed_image_format_block_byte_size(DataFormat p_format) const { switch (p_format) { case DATA_FORMAT_BC1_RGB_UNORM_BLOCK: case DATA_FORMAT_BC1_RGB_SRGB_BLOCK: diff --git a/servers/rendering/rendering_device_commons.h b/servers/rendering/rendering_device_commons.h index d72265958cb5..d516d968af62 100644 --- a/servers/rendering/rendering_device_commons.h +++ b/servers/rendering/rendering_device_commons.h @@ -893,7 +893,7 @@ class RenderingDeviceCommons : public Object { static uint32_t get_image_format_pixel_size(DataFormat p_format); static void get_compressed_image_format_block_dimensions(DataFormat p_format, uint32_t &r_w, uint32_t &r_h); - uint32_t get_compressed_image_format_block_byte_size(DataFormat p_format); + uint32_t get_compressed_image_format_block_byte_size(DataFormat p_format) const; static uint32_t get_compressed_image_format_pixel_rshift(DataFormat p_format); static uint32_t get_image_format_required_size(DataFormat p_format, uint32_t p_width, uint32_t p_height, uint32_t p_depth, uint32_t p_mipmaps, uint32_t *r_blockw = nullptr, uint32_t *r_blockh = nullptr, uint32_t *r_depth = nullptr); static uint32_t get_image_required_mipmaps(uint32_t p_width, uint32_t p_height, uint32_t p_depth); diff --git a/servers/rendering/rendering_device_driver.h b/servers/rendering/rendering_device_driver.h index 97c84c9d05f9..91da67c8d720 100644 --- a/servers/rendering/rendering_device_driver.h +++ b/servers/rendering/rendering_device_driver.h @@ -104,14 +104,14 @@ struct VersatileResourceTemplate { uint8_t data[MAX_RESOURCE_SIZE]; template - static T *allocate(PagedAllocator &p_allocator) { + static T *allocate(PagedAllocator &p_allocator) { T *obj = (T *)p_allocator.alloc(); memnew_placement(obj, T); return obj; } template - static void free(PagedAllocator &p_allocator, T *p_object) { + static void free(PagedAllocator &p_allocator, T *p_object) { p_object->~T(); p_allocator.free((VersatileResourceTemplate *)p_object); } diff --git a/servers/rendering/rendering_method.h b/servers/rendering/rendering_method.h index f6212faf085d..4c277ac21526 100644 --- a/servers/rendering/rendering_method.h +++ b/servers/rendering/rendering_method.h @@ -118,6 +118,11 @@ class RenderingMethod { virtual Variant instance_geometry_get_shader_parameter(RID p_instance, const StringName &p_parameter) const = 0; virtual Variant instance_geometry_get_shader_parameter_default_value(RID p_instance, const StringName &p_parameter) const = 0; + /* PIPELINES */ + + virtual void mesh_generate_pipelines(RID p_mesh, bool p_background_compilation) = 0; + virtual uint32_t get_pipeline_compilations(RS::PipelineSource p_source) = 0; + /* SKY API */ virtual RID sky_allocate() = 0; diff --git a/servers/rendering/rendering_server_default.cpp b/servers/rendering/rendering_server_default.cpp index b994ebf33747..20f1f9ad6f5b 100644 --- a/servers/rendering/rendering_server_default.cpp +++ b/servers/rendering/rendering_server_default.cpp @@ -281,6 +281,16 @@ uint64_t RenderingServerDefault::get_rendering_info(RenderingInfo p_info) { return RSG::viewport->get_total_primitives_drawn(); } else if (p_info == RENDERING_INFO_TOTAL_DRAW_CALLS_IN_FRAME) { return RSG::viewport->get_total_draw_calls_used(); + } else if (p_info == RENDERING_INFO_PIPELINE_COMPILATIONS_CANVAS) { + return RSG::canvas_render->get_pipeline_compilations(PIPELINE_SOURCE_CANVAS); + } else if (p_info == RENDERING_INFO_PIPELINE_COMPILATIONS_MESH) { + return RSG::canvas_render->get_pipeline_compilations(PIPELINE_SOURCE_MESH) + RSG::scene->get_pipeline_compilations(PIPELINE_SOURCE_MESH); + } else if (p_info == RENDERING_INFO_PIPELINE_COMPILATIONS_SURFACE) { + return RSG::scene->get_pipeline_compilations(PIPELINE_SOURCE_SURFACE); + } else if (p_info == RENDERING_INFO_PIPELINE_COMPILATIONS_DRAW) { + return RSG::canvas_render->get_pipeline_compilations(PIPELINE_SOURCE_DRAW) + RSG::scene->get_pipeline_compilations(PIPELINE_SOURCE_DRAW); + } else if (p_info == RENDERING_INFO_PIPELINE_COMPILATIONS_SPECIALIZATION) { + return RSG::canvas_render->get_pipeline_compilations(PIPELINE_SOURCE_SPECIALIZATION) + RSG::scene->get_pipeline_compilations(PIPELINE_SOURCE_SPECIALIZATION); } return RSG::utilities->get_rendering_info(p_info); } diff --git a/servers/rendering/rendering_server_default.h b/servers/rendering/rendering_server_default.h index 2dcdc3f2543d..1c70637c5e72 100644 --- a/servers/rendering/rendering_server_default.h +++ b/servers/rendering/rendering_server_default.h @@ -245,6 +245,26 @@ class RenderingServerDefault : public RenderingServer { FUNCRIDSPLIT(shader) + virtual RID shader_create_from_code(const String &p_code, const String &p_path_hint = String()) override { + RID shader = RSG::material_storage->shader_allocate(); + bool using_server_thread = Thread::get_caller_id() == server_thread; + if (using_server_thread || RSG::material_storage->can_create_resources_async()) { + if (using_server_thread) { + command_queue.flush_if_pending(); + } + + RSG::material_storage->shader_initialize(shader); + RSG::material_storage->shader_set_code(shader, p_code); + RSG::material_storage->shader_set_path_hint(shader, p_path_hint); + } else { + command_queue.push(RSG::material_storage, &RendererMaterialStorage::shader_initialize, shader); + command_queue.push(RSG::material_storage, &RendererMaterialStorage::shader_set_code, shader, p_code); + command_queue.push(RSG::material_storage, &RendererMaterialStorage::shader_set_path_hint, shader, p_path_hint); + } + + return shader; + } + FUNC2(shader_set_code, RID, const String &) FUNC2(shader_set_path_hint, RID, const String &) FUNC1RC(String, shader_get_code, RID) @@ -261,6 +281,28 @@ class RenderingServerDefault : public RenderingServer { FUNCRIDSPLIT(material) + virtual RID material_create_from_shader(RID p_next_pass, int p_render_priority, RID p_shader) override { + RID material = RSG::material_storage->material_allocate(); + bool using_server_thread = Thread::get_caller_id() == server_thread; + if (using_server_thread || RSG::material_storage->can_create_resources_async()) { + if (using_server_thread) { + command_queue.flush_if_pending(); + } + + RSG::material_storage->material_initialize(material); + RSG::material_storage->material_set_next_pass(material, p_next_pass); + RSG::material_storage->material_set_render_priority(material, p_render_priority); + RSG::material_storage->material_set_shader(material, p_shader); + } else { + command_queue.push(RSG::material_storage, &RendererMaterialStorage::material_initialize, material); + command_queue.push(RSG::material_storage, &RendererMaterialStorage::material_set_next_pass, material, p_next_pass); + command_queue.push(RSG::material_storage, &RendererMaterialStorage::material_set_render_priority, material, p_render_priority); + command_queue.push(RSG::material_storage, &RendererMaterialStorage::material_set_shader, material, p_shader); + } + + return material; + } + FUNC2(material_set_shader, RID, RID) FUNC3(material_set_param, RID, const StringName &, const Variant &) @@ -283,8 +325,9 @@ class RenderingServerDefault : public RenderingServer { // TODO once we have RSG::mesh_storage, add can_create_resources_async and call here instead of texture_storage!! - if (Thread::get_caller_id() == server_thread || RSG::texture_storage->can_create_resources_async()) { - if (Thread::get_caller_id() == server_thread) { + bool using_server_thread = Thread::get_caller_id() == server_thread; + if (using_server_thread || RSG::texture_storage->can_create_resources_async()) { + if (using_server_thread) { command_queue.flush_if_pending(); } RSG::mesh_storage->mesh_initialize(mesh); @@ -292,12 +335,14 @@ class RenderingServerDefault : public RenderingServer { for (int i = 0; i < p_surfaces.size(); i++) { RSG::mesh_storage->mesh_add_surface(mesh, p_surfaces[i]); } + RSG::scene->mesh_generate_pipelines(mesh, using_server_thread); } else { command_queue.push(RSG::mesh_storage, &RendererMeshStorage::mesh_initialize, mesh); command_queue.push(RSG::mesh_storage, &RendererMeshStorage::mesh_set_blend_shape_count, mesh, p_blend_shape_count); for (int i = 0; i < p_surfaces.size(); i++) { command_queue.push(RSG::mesh_storage, &RendererMeshStorage::mesh_add_surface, mesh, p_surfaces[i]); } + command_queue.push(RSG::scene, &RenderingMethod::mesh_generate_pipelines, mesh, true); } return mesh; diff --git a/servers/rendering/shader_compiler.cpp b/servers/rendering/shader_compiler.cpp index 3a0b9cf1580f..527a5e5725e4 100644 --- a/servers/rendering/shader_compiler.cpp +++ b/servers/rendering/shader_compiler.cpp @@ -1354,7 +1354,7 @@ String ShaderCompiler::_dump_node_code(const SL::Node *p_node, int p_level, Gene } code += ")"; if (is_screen_texture && !texture_func_returns_data && actions.apply_luminance_multiplier) { - code = "(" + code + " * vec4(vec3(sc_luminance_multiplier), 1.0))"; + code = "(" + code + " * vec4(vec3(sc_luminance_multiplier()), 1.0))"; } if (is_normal_roughness_texture && !texture_func_returns_data) { code = "normal_roughness_compatibility(" + code + ")"; diff --git a/servers/rendering/storage/material_storage.h b/servers/rendering/storage/material_storage.h index a5935cc90f72..b6c42cbeb235 100644 --- a/servers/rendering/storage/material_storage.h +++ b/servers/rendering/storage/material_storage.h @@ -38,6 +38,8 @@ class RendererMaterialStorage { public: virtual ~RendererMaterialStorage() {} + virtual bool can_create_resources_async() const = 0; + /* GLOBAL SHADER UNIFORM API */ virtual void global_shader_parameter_add(const StringName &p_name, RS::GlobalShaderParameterType p_type, const Variant &p_value) = 0; virtual void global_shader_parameter_remove(const StringName &p_name) = 0; diff --git a/servers/rendering_server.cpp b/servers/rendering_server.cpp index 0ad56961c0c7..083fc6851110 100644 --- a/servers/rendering_server.cpp +++ b/servers/rendering_server.cpp @@ -3447,6 +3447,18 @@ void RenderingServer::_bind_methods() { BIND_ENUM_CONSTANT(RENDERING_INFO_TEXTURE_MEM_USED); BIND_ENUM_CONSTANT(RENDERING_INFO_BUFFER_MEM_USED); BIND_ENUM_CONSTANT(RENDERING_INFO_VIDEO_MEM_USED); + BIND_ENUM_CONSTANT(RENDERING_INFO_PIPELINE_COMPILATIONS_CANVAS); + BIND_ENUM_CONSTANT(RENDERING_INFO_PIPELINE_COMPILATIONS_MESH); + BIND_ENUM_CONSTANT(RENDERING_INFO_PIPELINE_COMPILATIONS_SURFACE); + BIND_ENUM_CONSTANT(RENDERING_INFO_PIPELINE_COMPILATIONS_DRAW); + BIND_ENUM_CONSTANT(RENDERING_INFO_PIPELINE_COMPILATIONS_SPECIALIZATION); + + BIND_ENUM_CONSTANT(PIPELINE_SOURCE_CANVAS); + BIND_ENUM_CONSTANT(PIPELINE_SOURCE_MESH); + BIND_ENUM_CONSTANT(PIPELINE_SOURCE_SURFACE); + BIND_ENUM_CONSTANT(PIPELINE_SOURCE_DRAW); + BIND_ENUM_CONSTANT(PIPELINE_SOURCE_SPECIALIZATION); + BIND_ENUM_CONSTANT(PIPELINE_SOURCE_MAX); ADD_SIGNAL(MethodInfo("frame_pre_draw")); ADD_SIGNAL(MethodInfo("frame_post_draw")); diff --git a/servers/rendering_server.h b/servers/rendering_server.h index 878b02eaf136..0208a640a527 100644 --- a/servers/rendering_server.h +++ b/servers/rendering_server.h @@ -199,6 +199,17 @@ class RenderingServer : public Object { virtual RID texture_get_rd_texture(RID p_texture, bool p_srgb = false) const = 0; virtual uint64_t texture_get_native_handle(RID p_texture, bool p_srgb = false) const = 0; + /* PIPELINES API */ + + enum PipelineSource { + PIPELINE_SOURCE_CANVAS, + PIPELINE_SOURCE_MESH, + PIPELINE_SOURCE_SURFACE, + PIPELINE_SOURCE_DRAW, + PIPELINE_SOURCE_SPECIALIZATION, + PIPELINE_SOURCE_MAX + }; + /* SHADER API */ enum ShaderMode { @@ -211,6 +222,7 @@ class RenderingServer : public Object { }; virtual RID shader_create() = 0; + virtual RID shader_create_from_code(const String &p_code, const String &p_path_hint = String()) = 0; virtual void shader_set_code(RID p_shader, const String &p_code) = 0; virtual void shader_set_path_hint(RID p_shader, const String &p_path) = 0; @@ -242,6 +254,7 @@ class RenderingServer : public Object { }; virtual RID material_create() = 0; + virtual RID material_create_from_shader(RID p_next_pass, int p_render_priority, RID p_shader) = 0; virtual void material_set_shader(RID p_shader_material, RID p_shader) = 0; @@ -1697,6 +1710,11 @@ class RenderingServer : public Object { RENDERING_INFO_TEXTURE_MEM_USED, RENDERING_INFO_BUFFER_MEM_USED, RENDERING_INFO_VIDEO_MEM_USED, + RENDERING_INFO_PIPELINE_COMPILATIONS_CANVAS, + RENDERING_INFO_PIPELINE_COMPILATIONS_MESH, + RENDERING_INFO_PIPELINE_COMPILATIONS_SURFACE, + RENDERING_INFO_PIPELINE_COMPILATIONS_DRAW, + RENDERING_INFO_PIPELINE_COMPILATIONS_SPECIALIZATION, RENDERING_INFO_MAX }; @@ -1807,6 +1825,7 @@ class RenderingServer : public Object { VARIANT_ENUM_CAST(RenderingServer::TextureType); VARIANT_ENUM_CAST(RenderingServer::TextureLayeredType); VARIANT_ENUM_CAST(RenderingServer::CubeMapLayer); +VARIANT_ENUM_CAST(RenderingServer::PipelineSource); VARIANT_ENUM_CAST(RenderingServer::ShaderMode); VARIANT_ENUM_CAST(RenderingServer::ArrayType); VARIANT_BITFIELD_CAST(RenderingServer::ArrayFormat);