From fed4db12f4bd5f64c6fd527136c0b6c4150e1cc1 Mon Sep 17 00:00:00 2001 From: teoxoy <28601907+teoxoy@users.noreply.github.com> Date: Wed, 22 May 2024 16:24:52 +0200 Subject: [PATCH] [d3d12] get `num_workgroups` builtin working for indirect dispatches --- CHANGELOG.md | 4 ++ tests/tests/dispatch_workgroups_indirect.rs | 8 ++-- wgpu-core/src/device/resource.rs | 3 +- wgpu-core/src/indirect_validation.rs | 21 ++++++---- wgpu-hal/src/dx12/command.rs | 12 ++++-- wgpu-hal/src/dx12/device.rs | 45 +++++++++++++++++++++ wgpu-hal/src/dx12/mod.rs | 3 ++ 7 files changed, 79 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3e2712c544..c4a8f77db0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -49,6 +49,10 @@ Bottom level categories: - Print requested and supported usages on `UnsupportedUsage` error. By @VladasZ in [#6007](https://github.com/gfx-rs/wgpu/pull/6007) - Ensure safety of indirect dispatch. By @teoxoy in [#5714](https://github.com/gfx-rs/wgpu/pull/5714) +#### D3D12 + +- Get `num_workgroups` builtin working for indirect dispatches. By @teoxoy in [#5730](https://github.com/gfx-rs/wgpu/pull/5730) + ## 22.0.0 (2024-07-17) ### Overview diff --git a/tests/tests/dispatch_workgroups_indirect.rs b/tests/tests/dispatch_workgroups_indirect.rs index b37a1f2bc0..352c0ecfae 100644 --- a/tests/tests/dispatch_workgroups_indirect.rs +++ b/tests/tests/dispatch_workgroups_indirect.rs @@ -1,4 +1,4 @@ -use wgpu_test::{gpu_test, FailureCase, GpuTestConfiguration, TestParameters, TestingContext}; +use wgpu_test::{gpu_test, GpuTestConfiguration, TestParameters, TestingContext}; /// Make sure that the num_workgroups builtin works properly (it requires a workaround on D3D12). #[gpu_test] @@ -12,8 +12,7 @@ static NUM_WORKGROUPS_BUILTIN: GpuTestConfiguration = GpuTestConfiguration::new( .limits(wgpu::Limits { max_push_constant_size: 4, ..wgpu::Limits::downlevel_defaults() - }) - .expect_fail(FailureCase::backend(wgt::Backends::DX12)), + }), ) .run_async(|ctx| async move { let num_workgroups = [1, 2, 3]; @@ -34,8 +33,7 @@ static DISCARD_DISPATCH: GpuTestConfiguration = GpuTestConfiguration::new() max_compute_workgroups_per_dimension: 10, max_push_constant_size: 4, ..wgpu::Limits::downlevel_defaults() - }) - .expect_fail(FailureCase::backend(wgt::Backends::DX12)), + }), ) .run_async(|ctx| async move { let max = ctx.device.limits().max_compute_workgroups_per_dimension; diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs index 8ae84e11b4..5892e999ca 100644 --- a/wgpu-core/src/device/resource.rs +++ b/wgpu-core/src/device/resource.rs @@ -2622,7 +2622,8 @@ impl Device { let hal_desc = hal::PipelineLayoutDescriptor { label: desc.label.to_hal(self.instance_flags), - flags: hal::PipelineLayoutFlags::FIRST_VERTEX_INSTANCE, + flags: hal::PipelineLayoutFlags::FIRST_VERTEX_INSTANCE + | hal::PipelineLayoutFlags::NUM_WORK_GROUPS, bind_group_layouts: &raw_bind_group_layouts, push_constant_ranges: desc.push_constant_ranges.as_ref(), }; diff --git a/wgpu-core/src/indirect_validation.rs b/wgpu-core/src/indirect_validation.rs index 9a719e80a0..5db5106674 100644 --- a/wgpu-core/src/indirect_validation.rs +++ b/wgpu-core/src/indirect_validation.rs @@ -1,4 +1,4 @@ -use std::sync::atomic::AtomicBool; +use std::{num::NonZeroU64, sync::atomic::AtomicBool}; use thiserror::Error; @@ -63,7 +63,7 @@ impl IndirectValidation { let src = format!(" @group(0) @binding(0) - var dst: array; + var dst: array; @group(1) @binding(0) var src: array; struct OffsetPc {{ @@ -78,6 +78,9 @@ impl IndirectValidation { dst[0] = res.x; dst[1] = res.y; dst[2] = res.z; + dst[3] = res.x; + dst[4] = res.y; + dst[5] = res.z; }} "); @@ -123,6 +126,8 @@ impl IndirectValidation { } })?; + const DST_BUFFER_SIZE: NonZeroU64 = unsafe { NonZeroU64::new_unchecked(4 * 3 * 2) }; + let dst_bind_group_layout_desc = hal::BindGroupLayoutDescriptor { label: None, flags: hal::BindGroupLayoutFlags::empty(), @@ -132,7 +137,7 @@ impl IndirectValidation { ty: wgt::BindingType::Buffer { ty: wgt::BufferBindingType::Storage { read_only: false }, has_dynamic_offset: false, - min_binding_size: Some(std::num::NonZeroU64::new(4 * 3).unwrap()), + min_binding_size: Some(DST_BUFFER_SIZE), }, count: None, }], @@ -152,7 +157,7 @@ impl IndirectValidation { ty: wgt::BindingType::Buffer { ty: wgt::BufferBindingType::Storage { read_only: true }, has_dynamic_offset: true, - min_binding_size: Some(std::num::NonZeroU64::new(4 * 3).unwrap()), + min_binding_size: Some(NonZeroU64::new(4 * 3).unwrap()), }, count: None, }], @@ -204,7 +209,7 @@ impl IndirectValidation { let dst_buffer_desc = hal::BufferDescriptor { label: None, - size: 4 * 3, + size: DST_BUFFER_SIZE.get(), usage: hal::BufferUses::INDIRECT | hal::BufferUses::STORAGE_READ_WRITE, memory_flags: hal::MemoryFlags::empty(), }; @@ -224,7 +229,7 @@ impl IndirectValidation { buffers: &[hal::BufferBinding { buffer: &dst_buffer_0, offset: 0, - size: Some(std::num::NonZeroU64::new(4 * 3).unwrap()), + size: Some(DST_BUFFER_SIZE), }], samplers: &[], textures: &[], @@ -247,7 +252,7 @@ impl IndirectValidation { buffers: &[hal::BufferBinding { buffer: &dst_buffer_1, offset: 0, - size: Some(std::num::NonZeroU64::new(4 * 3).unwrap()), + size: Some(DST_BUFFER_SIZE), }], samplers: &[], textures: &[], @@ -292,7 +297,7 @@ impl IndirectValidation { buffers: &[hal::BufferBinding { buffer, offset: 0, - size: Some(std::num::NonZeroU64::new(binding_size).unwrap()), + size: Some(NonZeroU64::new(binding_size).unwrap()), }], samplers: &[], textures: &[], diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index 3c535b2234..87f8a5f961 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -1193,11 +1193,17 @@ impl crate::CommandEncoder for super::CommandEncoder { self.list.as_ref().unwrap().dispatch(count); } unsafe fn dispatch_indirect(&mut self, buffer: &super::Buffer, offset: wgt::BufferAddress) { - self.prepare_dispatch([0; 3]); - //TODO: update special constants indirectly + self.update_root_elements(); + let cmd_signature = if let Some(cmd_signatures) = + self.pass.layout.special_constants_cmd_signatures.as_mut() + { + cmd_signatures.dispatch.as_mut_ptr() + } else { + self.shared.cmd_signatures.dispatch.as_mut_ptr() + }; unsafe { self.list.as_ref().unwrap().ExecuteIndirect( - self.shared.cmd_signatures.dispatch.as_mut_ptr(), + cmd_signature, 1, buffer.resource.as_mut_ptr(), offset, diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index e886e2fd04..c27b5be72e 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -1105,6 +1105,50 @@ impl crate::Device for super::Device { .create_root_signature(blob, 0) .into_device_result("Root signature creation")?; + let special_constants_cmd_signatures = + if let Some(root_index) = special_constants_root_index { + Some(super::CommandSignatures { + draw: self + .raw + .create_command_signature( + raw.clone(), + &[ + d3d12::IndirectArgument::constant(root_index, 0, 3), + d3d12::IndirectArgument::draw(), + ], + 12 + mem::size_of::() as u32, + 0, + ) + .into_device_result("Command (draw) signature creation")?, + draw_indexed: self + .raw + .create_command_signature( + raw.clone(), + &[ + d3d12::IndirectArgument::constant(root_index, 0, 3), + d3d12::IndirectArgument::draw_indexed(), + ], + 12 + mem::size_of::() as u32, + 0, + ) + .into_device_result("Command (draw_indexed) signature creation")?, + dispatch: self + .raw + .create_command_signature( + raw.clone(), + &[ + d3d12::IndirectArgument::constant(root_index, 0, 3), + d3d12::IndirectArgument::dispatch(), + ], + 12 + mem::size_of::() as u32, + 0, + ) + .into_device_result("Command (dispatch) signature creation")?, + }) + } else { + None + }; + log::debug!("\traw = {:?}", raw); if let Some(label) = desc.label { @@ -1119,6 +1163,7 @@ impl crate::Device for super::Device { signature: raw, total_root_elements: parameters.len() as super::RootIndex, special_constants_root_index, + special_constants_cmd_signatures, root_constant_info, }, bind_group_infos, diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index 0bb7adc75e..c3b10a2cce 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -227,6 +227,7 @@ struct Idler { event: d3d12::Event, } +#[derive(Debug, Clone)] struct CommandSignatures { draw: d3d12::CommandSignature, draw_indexed: d3d12::CommandSignature, @@ -345,6 +346,7 @@ impl PassState { signature: d3d12::RootSignature::null(), total_root_elements: 0, special_constants_root_index: None, + special_constants_cmd_signatures: None, root_constant_info: None, }, root_elements: [RootElement::Empty; MAX_ROOT_ELEMENTS], @@ -556,6 +558,7 @@ struct PipelineLayoutShared { signature: d3d12::RootSignature, total_root_elements: RootIndex, special_constants_root_index: Option, + special_constants_cmd_signatures: Option, root_constant_info: Option, }