From 65920e33daf18a661368a2443e9695e86073ea3a Mon Sep 17 00:00:00 2001 From: Jim Blandy Date: Mon, 2 Sep 2024 14:27:36 -0700 Subject: [PATCH] wip --- wgpu-core/src/binding_model.rs | 10 ++++++ wgpu-core/src/command/clear.rs | 2 +- wgpu-core/src/device/resource.rs | 17 +++++++-- wgpu-hal/src/dx12/adapter.rs | 3 ++ wgpu-hal/src/gles/adapter.rs | 10 ++++++ wgpu-hal/src/lib.rs | 61 ++++++++++++++++++++++++++++++++ wgpu-hal/src/metal/adapter.rs | 4 +++ wgpu-hal/src/vulkan/adapter.rs | 27 +++++++++++--- wgpu-hal/src/vulkan/mod.rs | 24 +++++++++++++ wgpu-types/src/lib.rs | 10 ++++-- 10 files changed, 159 insertions(+), 9 deletions(-) diff --git a/wgpu-core/src/binding_model.rs b/wgpu-core/src/binding_model.rs index d8a8b32d2f..bd89fbb2b0 100644 --- a/wgpu-core/src/binding_model.rs +++ b/wgpu-core/src/binding_model.rs @@ -884,6 +884,16 @@ pub(crate) fn buffer_binding_type_alignment( } } +pub(crate) fn buffer_binding_type_bounds_check_alignment( + alignments: &hal::Alignments, + binding_type: wgt::BufferBindingType, +) -> wgt::BufferAddress { + match binding_type { + wgt::BufferBindingType::Uniform => alignments.uniform_bounds_check_alignment.get(), + wgt::BufferBindingType::Storage { .. } => wgt::COPY_BUFFER_ALIGNMENT, + } +} + #[derive(Debug)] pub struct BindGroup { pub(crate) raw: Snatchable>, diff --git a/wgpu-core/src/command/clear.rs b/wgpu-core/src/command/clear.rs index 944dd40af4..07dbfec95c 100644 --- a/wgpu-core/src/command/clear.rs +++ b/wgpu-core/src/command/clear.rs @@ -371,7 +371,7 @@ fn clear_texture_via_buffer_copies( for mip_level in range.mip_range { let mut mip_size = texture_desc.mip_level_size(mip_level).unwrap(); // Round to multiple of block size - mip_size.width = align_to(mip_size.width, block_width); + mip_size.width = alignxo_to(mip_size.width, block_width); mip_size.height = align_to(mip_size.height, block_height); let bytes_per_row = align_to( diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs index 9f10104ee7..9cdf838299 100644 --- a/wgpu-core/src/device/resource.rs +++ b/wgpu-core/src/device/resource.rs @@ -39,7 +39,7 @@ use once_cell::sync::OnceCell; use smallvec::SmallVec; use thiserror::Error; -use wgt::{DeviceLostReason, TextureFormat, TextureSampleType, TextureViewDimension}; +use wgt::{DeviceLostReason, TextureFormat, TextureSampleType, TextureViewDimension, math::align_to}; use std::{ borrow::Cow, @@ -2004,10 +2004,23 @@ impl Device { late_buffer_binding_sizes.insert(binding, late_size); } + // This was checked against the device's alignment requirements above, + // which should always be a multiple of `COPY_BUFFER_ALIGNMENT`. assert_eq!(bb.offset % wgt::COPY_BUFFER_ALIGNMENT, 0); + + // `wgpu_hal` only restricts shader access to bound buffer regions with + // a certain resolution. For the sake of lazy initialization, round up + // the size of the bound range to reflect how much of the buffer is + // actually going to be visible to the shader. + let bounds_check_alignment = binding_model::buffer_binding_type_bounds_check_alignment( + &self.alignments, + binding_ty, + ); + let visible_size = align_to(bind_size, bounds_check_alignment); + used_buffer_ranges.extend(buffer.initialization_status.read().create_action( buffer, - bb.offset..bb.offset + bind_size, + bb.offset..bb.offset + visible_size, MemoryInitKind::NeedsInitializedMemory, )); diff --git a/wgpu-hal/src/dx12/adapter.rs b/wgpu-hal/src/dx12/adapter.rs index 00930024ab..b2b710941f 100644 --- a/wgpu-hal/src/dx12/adapter.rs +++ b/wgpu-hal/src/dx12/adapter.rs @@ -524,6 +524,9 @@ impl super::Adapter { Direct3D12::D3D12_TEXTURE_DATA_PITCH_ALIGNMENT as u64, ) .unwrap(), + // Direct3D correctly bounds-checks all array accesses: + // https://microsoft.github.io/DirectX-Specs/d3d/archive/D3D11_3_FunctionalSpec.htm#18.6.8.2%20Device%20Memory%20Reads + uniform_bounds_check_alignment: wgt::BufferSize::new(1).unwrap(), }, downlevel, }, diff --git a/wgpu-hal/src/gles/adapter.rs b/wgpu-hal/src/gles/adapter.rs index e7ecacebe0..de3c0e903e 100644 --- a/wgpu-hal/src/gles/adapter.rs +++ b/wgpu-hal/src/gles/adapter.rs @@ -841,6 +841,16 @@ impl super::Adapter { alignments: crate::Alignments { buffer_copy_offset: wgt::BufferSize::new(4).unwrap(), buffer_copy_pitch: wgt::BufferSize::new(4).unwrap(), + // #6151: `wgpu_hal::gles` doesn't ask Naga to inject bounds + // checks in GLSL, and it doesn't request extensions like + // `KHR_robust_buffer_access_behavior` that would provide + // them, so we can't really implement the checks promised by + // [`crate::BufferBinding`]. + // + // Since this is a pre-existing condition, for the time + // being, provide 1 as the value here, to cause as little + // trouble as possible. + uniform_bounds_check_alignment: wgt::BufferSize::new(1).unwrap(), }, }, }) diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index 92afa71d43..04638d6b4c 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -1635,9 +1635,25 @@ pub struct InstanceDescriptor<'a> { pub struct Alignments { /// The alignment of the start of the buffer used as a GPU copy source. pub buffer_copy_offset: wgt::BufferSize, + /// The alignment of the row pitch of the texture data stored in a buffer that is /// used in a GPU copy operation. pub buffer_copy_pitch: wgt::BufferSize, + + /// The finest alignment of bound range checking for uniform buffers. + /// + /// As explained in the documentation for [`BufferBinding`], when `wgpu_hal` + /// restricts shader references to the accessible region of a [`Uniform`] + /// buffer, the size of the accessible region is the bind group binding's + /// stated [size], rounded up to the next multiple of this value. + /// + /// We don't need an analogous field for storage buffers, because Vulkan and + /// all other platforms promise at most four-byte alignment, so `wgpu_hal` + /// always rounds up the size by that much. + /// + /// [`Uniform`]: wgt::BufferBindingType::Uniform + /// [size]: BufferBinding::size + pub uniform_bounds_check_alignment: wgt::BufferSize, } #[derive(Clone, Debug)] @@ -1807,6 +1823,33 @@ pub struct PipelineLayoutDescriptor<'a, B: DynBindGroupLayout + ?Sized> { pub push_constant_ranges: &'a [wgt::PushConstantRange], } +/// A region of a buffer made visible to shaders via a [`BindGroup`]. +/// +/// `wgpu_hal` guarantees that shaders compiled with +/// [`ShaderModuleDescriptor::runtime_checks`] set to `true` cannot read or +/// write data via this binding outside the *accessible region* of [`buffer`]: +/// +/// - The accessible region starts at [`offset`]. +/// +/// - For [`Storage`] bindings, the size of the accessible region is [`size`], +/// which must be a multiple of 4. +/// +/// - For [`Uniform`] bindings, the size of the accessible region is [`size`] +/// rounded up to the next multiple of +/// [`Alignments::uniform_bounds_check_alignment`]. +/// +/// Note that this guarantee is stricter than WGSL's requirements for +/// [out-of-bounds accesses][woob], as WGSL allows them to return values from +/// elsewhere in the buffer. But this guarantee is necessary anyway, to +/// permit `wgpu-core` to use lazy buffer initialization. +/// +/// [`BindGroup`]: Api::BindGroup +/// [`buffer`]: BufferBinding::buffer +/// [`offset`]: BufferBinding::offset +/// [`size`]: BufferBinding::size +/// [`Storage`]: wgt::BufferBindingType::Storage +/// [`Uniform`]: wgt::BufferBindingType::Uniform +/// [woob]: https://gpuweb.github.io/gpuweb/wgsl/#out-of-bounds-access-sec #[derive(Debug)] pub struct BufferBinding<'a, B: DynBuffer + ?Sized> { /// The buffer being bound. @@ -1925,6 +1968,24 @@ pub enum ShaderInput<'a> { pub struct ShaderModuleDescriptor<'a> { pub label: Label<'a>, + + /// Enforce bounds checks in shaders, even if the underlying driver doesn't + /// support doing so natively. + /// + /// When this is `true`, `wgpu_hal` promises that shaders can only read or + /// write the *accessible region* of a bindgroup's buffer bindings, as + /// explained in the documentation for [`BufferBinding`]. If the underlying + /// graphics platform cannot implement these bounds checks itself, + /// `wgpu_hal` will inject bounds checks before presenting the shader to the + /// platform. + /// + /// When this is `false`, `wgpu_hal` only enforces such bounds checks if the + /// underlying platform provides a way to do so itself. + /// + /// Note that `wgpu_hal` users may try to initialize only those portions of + /// buffers that they anticipate might be read from. Passing `false` here + /// may allow shaders to see wider regions of the buffers than expected, + /// making such deferred initialization visible to the application. pub runtime_checks: bool, } diff --git a/wgpu-hal/src/metal/adapter.rs b/wgpu-hal/src/metal/adapter.rs index 5ef6d358b8..47642079a0 100644 --- a/wgpu-hal/src/metal/adapter.rs +++ b/wgpu-hal/src/metal/adapter.rs @@ -997,6 +997,10 @@ impl super::PrivateCapabilities { alignments: crate::Alignments { buffer_copy_offset: wgt::BufferSize::new(self.buffer_alignment).unwrap(), buffer_copy_pitch: wgt::BufferSize::new(4).unwrap(), + // This backend has Naga incorporate bounds checks into the + // Metal Shading Language it generates, so from `wgpu_hal`'s + // users' point of view, references are tightly checked. + uniform_bounds_check_alignment: wgt::BufferSize::new(1), }, downlevel, } diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs index 25d0d75049..18a5d4aeb9 100644 --- a/wgpu-hal/src/vulkan/adapter.rs +++ b/wgpu-hal/src/vulkan/adapter.rs @@ -342,9 +342,6 @@ impl PhysicalDeviceFeatures { None }, robustness2: if enabled_extensions.contains(&ext::robustness2::NAME) { - // Note: enabling `robust_buffer_access2` isn't requires, strictly speaking - // since we can enable `robust_buffer_access` all the time. But it improves - // program portability, so we opt into it if they are supported. Some( vk::PhysicalDeviceRobustness2FeaturesEXT::default() .robust_buffer_access2(private_caps.robust_buffer_access2) @@ -842,6 +839,10 @@ pub struct PhysicalDeviceProperties { /// `VK_EXT_subgroup_size_control` extension, promoted to Vulkan 1.3. subgroup_size_control: Option>, + /// Additional `vk::PhysicalDevice` properties from the + /// `VK_EXT_robustness2` extension. + robustness2: Option>, + /// The device API version. /// /// Which is the version of Vulkan supported for device-level functionality. @@ -1104,6 +1105,15 @@ impl PhysicalDeviceProperties { .unwrap(), buffer_copy_pitch: wgt::BufferSize::new(limits.optimal_buffer_copy_row_pitch_alignment) .unwrap(), + uniform_bounds_check_alignment: { + let alignment = if let Some(ref robustness2) = self.robustness2 { + robustness2.robust_uniform_buffer_access_size_alignment + } else { + // Naga-injected bounds checks are precise. + 1 + }; + wgt::BufferSize::new(alignment).unwrap() + }, } } } @@ -1133,6 +1143,7 @@ impl super::InstanceShared { let supports_subgroup_size_control = capabilities.device_api_version >= vk::API_VERSION_1_3 || capabilities.supports_extension(ext::subgroup_size_control::NAME); + let supports_robustness2 = capabilities.supports_extension(ext::robustness2::NAME); let supports_acceleration_structure = capabilities.supports_extension(khr::acceleration_structure::NAME); @@ -1180,6 +1191,13 @@ impl super::InstanceShared { properties2 = properties2.push_next(next); } + if supports_robustness2 { + let next = capabilities + .robustness2 + .insert(vk::PhysicalDeviceRobustness2PropertiesEXT::default()); + properties2 = properties2.push_next(next); + } + unsafe { get_device_properties.get_physical_device_properties2(phd, &mut properties2) }; @@ -1191,6 +1209,7 @@ impl super::InstanceShared { capabilities .supported_extensions .retain(|&x| x.extension_name_as_c_str() != Ok(ext::robustness2::NAME)); + capabilities.robustness2 = None; } }; capabilities @@ -1779,7 +1798,7 @@ impl super::Adapter { capabilities: Some(capabilities.iter().cloned().collect()), bounds_check_policies: naga::proc::BoundsCheckPolicies { index: naga::proc::BoundsCheckPolicy::Restrict, - buffer: if self.private_caps.robust_buffer_access { + buffer: if self.private_caps.robust_buffer_access2 { naga::proc::BoundsCheckPolicy::Unchecked } else { naga::proc::BoundsCheckPolicy::Restrict diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs index 0ee90c4590..89537300c5 100644 --- a/wgpu-hal/src/vulkan/mod.rs +++ b/wgpu-hal/src/vulkan/mod.rs @@ -493,9 +493,33 @@ struct PrivateCapabilities { /// Ability to present contents to any screen. Only needed to work around broken platform configurations. can_present: bool, non_coherent_map_mask: wgt::BufferAddress, + + /// True if this adapter advertises the [`robustBufferAccess`][vrba] feature. + /// + /// Note that Vulkan's `robustBufferAccess` is not sufficient to implement + /// `wgpu_hal`'s guarantees. As explained in the documentation for + /// [`BufferBinding`], shaders must not be able to access buffer contents + /// via a given bindgroup binding beyond that binding's *accessible region*. + /// Enabling `robustBufferAccess` does ensure that out-of-bounds reads and + /// writes are not undefined behavior (that's good), but still permits + /// out-of-bounds reads to return data from anywhere within the buffer, not + /// just the accessible region. + /// + /// [vrba]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#features-robustBufferAccess + /// [`BufferBinding`]: crate::BufferBinding robust_buffer_access: bool, + robust_image_access: bool, + + /// True if this adapter supports the [`VK_EXT_robustness2`] extension's + /// `robustBufferAccess2` feature. + /// + /// This is sufficient to implement `wgpu_hal`'s required bounds-checking of + /// shader accesses to buffer contents. If this feature is not available, + /// this backend must have Naga inject bounds checks in the generated + /// SPIR-V. robust_buffer_access2: bool, + robust_image_access2: bool, zero_initialize_workgroup_memory: bool, image_format_list: bool, diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs index cd2f5dcd0a..1983dc3181 100644 --- a/wgpu-types/src/lib.rs +++ b/wgpu-types/src/lib.rs @@ -7309,8 +7309,14 @@ impl ShaderBoundChecks { /// Creates a new configuration where the shader isn't bound checked. /// /// # Safety - /// The caller MUST ensure that all shaders built with this configuration don't perform any - /// out of bounds reads or writes. + /// + /// The caller MUST ensure that all shaders built with this configuration + /// don't perform any out of bounds reads or writes. `wgpu_core`, in + /// particular, initializes only those portions of buffers that it expects + /// might be read, and it does not expect contents outside the ranges bound + /// in bindgroups to be accessible, so using this configuration with + /// ill-behaved shaders could expose uninitialized GPU memory contents to + /// the application. #[must_use] pub unsafe fn unchecked() -> Self { ShaderBoundChecks {