Skip to content

Commit

Permalink
Move transitions of textures initialized by transfer workers to the m…
Browse files Browse the repository at this point in the history
…ain graphics queue.

Also adds a new possible texture layout and API trait to support a particular behavior in D3D12 where only the COMMON layout is supported in copy queues. Fixes godotengine#98158.
  • Loading branch information
DarioSamo committed Oct 17, 2024
1 parent 7a936e8 commit b5c7d28
Show file tree
Hide file tree
Showing 6 changed files with 48 additions and 12 deletions.
4 changes: 4 additions & 0 deletions drivers/d3d12/rendering_device_driver_d3d12.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2003,6 +2003,8 @@ static D3D12_BARRIER_LAYOUT _rd_texture_layout_to_d3d12_barrier_layout(RDD::Text
switch (p_texture_layout) {
case RDD::TEXTURE_LAYOUT_UNDEFINED:
return D3D12_BARRIER_LAYOUT_UNDEFINED;
case RDD::TEXTURE_LAYOUT_GENERAL:
return D3D12_BARRIER_LAYOUT_COMMON;
case RDD::TEXTURE_LAYOUT_STORAGE_OPTIMAL:
return D3D12_BARRIER_LAYOUT_UNORDERED_ACCESS;
case RDD::TEXTURE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
Expand Down Expand Up @@ -6175,6 +6177,8 @@ uint64_t RenderingDeviceDriverD3D12::api_trait_get(ApiTrait p_trait) {
return false;
case API_TRAIT_CLEARS_WITH_COPY_ENGINE:
return false;
case API_TRAIT_USE_GENERAL_IN_COPY_QUEUES:
return true;
default:
return RenderingDeviceDriver::api_trait_get(p_trait);
}
Expand Down
1 change: 1 addition & 0 deletions drivers/vulkan/rendering_device_driver_vulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,7 @@ static const VkFormat RD_TO_VK_FORMAT[RDD::DATA_FORMAT_MAX] = {

static VkImageLayout RD_TO_VK_LAYOUT[RDD::TEXTURE_LAYOUT_MAX] = {
VK_IMAGE_LAYOUT_UNDEFINED, // TEXTURE_LAYOUT_UNDEFINED
VK_IMAGE_LAYOUT_GENERAL, // TEXTURE_LAYOUT_GENERAL
VK_IMAGE_LAYOUT_GENERAL, // TEXTURE_LAYOUT_STORAGE_OPTIMAL
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, // TEXTURE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, // TEXTURE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL
Expand Down
46 changes: 35 additions & 11 deletions servers/rendering/rendering_device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1243,6 +1243,7 @@ Error RenderingDevice::_texture_initialize(RID p_texture, uint32_t p_layer, cons
TransferWorker *transfer_worker = nullptr;
const uint8_t *read_ptr = p_data.ptr();
uint8_t *write_ptr = nullptr;
const RDD::TextureLayout copy_dst_layout = driver->api_trait_get(RDD::API_TRAIT_USE_GENERAL_IN_COPY_QUEUES) ? RDD::TEXTURE_LAYOUT_GENERAL : RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL;
for (uint32_t pass = 0; pass < 2; pass++) {
const bool copy_pass = (pass == 1);
if (copy_pass) {
Expand All @@ -1267,7 +1268,7 @@ Error RenderingDevice::_texture_initialize(RID p_texture, uint32_t p_layer, cons
tb.texture = texture->driver_id;
tb.dst_access = RDD::BARRIER_ACCESS_COPY_WRITE_BIT;
tb.prev_layout = RDD::TEXTURE_LAYOUT_UNDEFINED;
tb.next_layout = RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL;
tb.next_layout = copy_dst_layout;
tb.subresources.aspect = texture->barrier_aspect_flags;
tb.subresources.mipmap_count = texture->mipmaps;
tb.subresources.base_layer = p_layer;
Expand Down Expand Up @@ -1313,7 +1314,7 @@ Error RenderingDevice::_texture_initialize(RID p_texture, uint32_t p_layer, cons
copy_region.texture_subresources.layer_count = 1;
copy_region.texture_offset = Vector3i(0, 0, z);
copy_region.texture_region_size = Vector3i(logic_width, logic_height, 1);
driver->command_copy_buffer_to_texture(transfer_worker->command_buffer, transfer_worker->staging_buffer, texture->driver_id, RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL, copy_region);
driver->command_copy_buffer_to_texture(transfer_worker->command_buffer, transfer_worker->staging_buffer, texture->driver_id, copy_dst_layout, copy_region);
}

staging_local_offset += to_allocate;
Expand All @@ -1332,14 +1333,13 @@ Error RenderingDevice::_texture_initialize(RID p_texture, uint32_t p_layer, cons
RDD::TextureBarrier tb;
tb.texture = texture->driver_id;
tb.src_access = RDD::BARRIER_ACCESS_COPY_WRITE_BIT;
tb.prev_layout = RDD::TEXTURE_LAYOUT_COPY_DST_OPTIMAL;
tb.prev_layout = copy_dst_layout;
tb.next_layout = RDD::TEXTURE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
tb.subresources.aspect = texture->barrier_aspect_flags;
tb.subresources.mipmap_count = texture->mipmaps;
tb.subresources.base_layer = p_layer;
tb.subresources.layer_count = 1;

driver->command_pipeline_barrier(transfer_worker->command_buffer, RDD::PIPELINE_STAGE_COPY_BIT, RDD::PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, {}, {}, tb);
transfer_worker->texture_barriers.push_back(tb);
}

_release_transfer_worker(transfer_worker);
Expand Down Expand Up @@ -5152,6 +5152,21 @@ void RenderingDevice::_wait_for_transfer_worker(TransferWorker *p_transfer_worke
MutexLock lock(p_transfer_worker->operations_mutex);
p_transfer_worker->operations_processed = p_transfer_worker->operations_submitted;
}

if (!p_transfer_worker->texture_barriers.is_empty()) {
MutexLock transfer_worker_lock(transfer_worker_pool_mutex);
_flush_barriers_for_transfer_worker(p_transfer_worker);
}
}

void RenderingDevice::_flush_barriers_for_transfer_worker(TransferWorker *p_transfer_worker) {
if (!p_transfer_worker->texture_barriers.is_empty()) {
for (uint32_t i = 0; i < p_transfer_worker->texture_barriers.size(); i++) {
transfer_worker_pool_texture_barriers.push_back(p_transfer_worker->texture_barriers[i]);
}

p_transfer_worker->texture_barriers.clear();
}
}

void RenderingDevice::_check_transfer_worker_operation(uint32_t p_transfer_worker_index, uint64_t p_transfer_worker_operation) {
Expand Down Expand Up @@ -5193,11 +5208,11 @@ void RenderingDevice::_check_transfer_worker_index_array(IndexArray *p_index_arr
}
}

void RenderingDevice::_submit_transfer_workers(bool p_operations_used_by_draw) {
void RenderingDevice::_submit_transfer_workers(RDD::CommandBufferID p_draw_command_buffer) {
MutexLock transfer_worker_lock(transfer_worker_pool_mutex);
for (uint32_t i = 0; i < transfer_worker_pool.size(); i++) {
TransferWorker *worker = transfer_worker_pool[i];
if (p_operations_used_by_draw) {
if (p_draw_command_buffer) {
MutexLock lock(worker->operations_mutex);
if (worker->operations_processed >= transfer_worker_operation_used_by_draw[worker->index]) {
// The operation used by the draw has already been processed, we don't need to wait on the worker.
Expand All @@ -5208,12 +5223,21 @@ void RenderingDevice::_submit_transfer_workers(bool p_operations_used_by_draw) {
{
MutexLock lock(worker->thread_mutex);
if (worker->recording) {
VectorView<RDD::SemaphoreID> semaphores = p_operations_used_by_draw ? frames[frame].transfer_worker_semaphores[i] : VectorView<RDD::SemaphoreID>();
VectorView<RDD::SemaphoreID> semaphores = p_draw_command_buffer ? frames[frame].transfer_worker_semaphores[i] : VectorView<RDD::SemaphoreID>();
_end_transfer_worker(worker);
_submit_transfer_worker(worker, semaphores);
}

if (p_draw_command_buffer) {
_flush_barriers_for_transfer_worker(worker);
}
}
}

if (p_draw_command_buffer && !transfer_worker_pool_texture_barriers.is_empty()) {
driver->command_pipeline_barrier(p_draw_command_buffer, RDD::PIPELINE_STAGE_COPY_BIT, RDD::PIPELINE_STAGE_ALL_COMMANDS_BIT, {}, {}, transfer_worker_pool_texture_barriers);
transfer_worker_pool_texture_barriers.clear();
}
}

void RenderingDevice::_wait_for_transfer_workers() {
Expand Down Expand Up @@ -5807,10 +5831,10 @@ void RenderingDevice::_end_frame() {
ERR_PRINT("Found open compute list at the end of the frame, this should never happen (further compute will likely not work).");
}

_submit_transfer_workers(true);

// The command buffer must be copied into a stack variable as the driver workarounds can change the command buffer in use.
RDD::CommandBufferID command_buffer = frames[frame].command_buffer;
_submit_transfer_workers(command_buffer);

draw_graph.end(RENDER_GRAPH_REORDER, RENDER_GRAPH_FULL_BARRIERS, command_buffer, frames[frame].command_buffer_pool);
driver->command_buffer_end(command_buffer);
driver->end_segment();
Expand Down Expand Up @@ -6387,7 +6411,7 @@ void RenderingDevice::finalize() {
}

// Wait for transfer workers to finish.
_submit_transfer_workers(false);
_submit_transfer_workers();
_wait_for_transfer_workers();

// Delete everything the graph has created.
Expand Down
5 changes: 4 additions & 1 deletion servers/rendering/rendering_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -1267,6 +1267,7 @@ class RenderingDevice : public RenderingDeviceCommons {
RDD::CommandBufferID command_buffer;
RDD::CommandPoolID command_pool;
RDD::FenceID command_fence;
LocalVector<RDD::TextureBarrier> texture_barriers;
bool recording = false;
bool submitted = false;
BinaryMutex thread_mutex;
Expand All @@ -1280,6 +1281,7 @@ class RenderingDevice : public RenderingDeviceCommons {
uint32_t transfer_worker_pool_max_size = 1;
LocalVector<uint64_t> transfer_worker_operation_used_by_draw;
LocalVector<uint32_t> transfer_worker_pool_available_list;
LocalVector<RDD::TextureBarrier> transfer_worker_pool_texture_barriers;
BinaryMutex transfer_worker_pool_mutex;
ConditionVariable transfer_worker_pool_condition;

Expand All @@ -1288,12 +1290,13 @@ class RenderingDevice : public RenderingDeviceCommons {
void _end_transfer_worker(TransferWorker *p_transfer_worker);
void _submit_transfer_worker(TransferWorker *p_transfer_worker, VectorView<RDD::SemaphoreID> p_signal_semaphores = VectorView<RDD::SemaphoreID>());
void _wait_for_transfer_worker(TransferWorker *p_transfer_worker);
void _flush_barriers_for_transfer_worker(TransferWorker *p_transfer_worker);
void _check_transfer_worker_operation(uint32_t p_transfer_worker_index, uint64_t p_transfer_worker_operation);
void _check_transfer_worker_buffer(Buffer *p_buffer);
void _check_transfer_worker_texture(Texture *p_texture);
void _check_transfer_worker_vertex_array(VertexArray *p_vertex_array);
void _check_transfer_worker_index_array(IndexArray *p_index_array);
void _submit_transfer_workers(bool p_operations_used_by_draw);
void _submit_transfer_workers(RDD::CommandBufferID p_draw_command_buffer = RDD::CommandBufferID());
void _wait_for_transfer_workers();
void _free_transfer_workers();

Expand Down
2 changes: 2 additions & 0 deletions servers/rendering/rendering_device_driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,8 @@ uint64_t RenderingDeviceDriver::api_trait_get(ApiTrait p_trait) {
return 1;
case API_TRAIT_CLEARS_WITH_COPY_ENGINE:
return true;
case API_TRAIT_USE_GENERAL_IN_COPY_QUEUES:
return false;
default:
ERR_FAIL_V(0);
}
Expand Down
2 changes: 2 additions & 0 deletions servers/rendering/rendering_device_driver.h
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,7 @@ class RenderingDeviceDriver : public RenderingDeviceCommons {

enum TextureLayout {
TEXTURE_LAYOUT_UNDEFINED,
TEXTURE_LAYOUT_GENERAL,
TEXTURE_LAYOUT_STORAGE_OPTIMAL,
TEXTURE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
TEXTURE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
Expand Down Expand Up @@ -750,6 +751,7 @@ class RenderingDeviceDriver : public RenderingDeviceCommons {
API_TRAIT_TEXTURE_DATA_ROW_PITCH_STEP,
API_TRAIT_SECONDARY_VIEWPORT_SCISSOR,
API_TRAIT_CLEARS_WITH_COPY_ENGINE,
API_TRAIT_USE_GENERAL_IN_COPY_QUEUES,
};

enum ShaderChangeInvalidation {
Expand Down

0 comments on commit b5c7d28

Please sign in to comment.