From 65920e33daf18a661368a2443e9695e86073ea3a Mon Sep 17 00:00:00 2001
From: Jim Blandy <jimb@red-bean.com>
Date: Mon, 2 Sep 2024 14:27:36 -0700
Subject: [PATCH] wip

---
 wgpu-core/src/binding_model.rs   | 10 ++++++
 wgpu-core/src/command/clear.rs   |  2 +-
 wgpu-core/src/device/resource.rs | 17 +++++++--
 wgpu-hal/src/dx12/adapter.rs     |  3 ++
 wgpu-hal/src/gles/adapter.rs     | 10 ++++++
 wgpu-hal/src/lib.rs              | 61 ++++++++++++++++++++++++++++++++
 wgpu-hal/src/metal/adapter.rs    |  4 +++
 wgpu-hal/src/vulkan/adapter.rs   | 27 +++++++++++---
 wgpu-hal/src/vulkan/mod.rs       | 24 +++++++++++++
 wgpu-types/src/lib.rs            | 10 ++++--
 10 files changed, 159 insertions(+), 9 deletions(-)
diff --git a/wgpu-core/src/binding_model.rs b/wgpu-core/src/binding_model.rs
index d8a8b32d2f..bd89fbb2b0 100644
--- a/wgpu-core/src/binding_model.rs
+++ b/wgpu-core/src/binding_model.rs
@@ -884,6 +884,16 @@ pub(crate) fn buffer_binding_type_alignment(
     }
 }
 
+pub(crate) fn buffer_binding_type_bounds_check_alignment(
+    alignments: &hal::Alignments,
+    binding_type: wgt::BufferBindingType,
+) -> wgt::BufferAddress {
+    match binding_type {
+        wgt::BufferBindingType::Uniform => alignments.uniform_bounds_check_alignment.get(),
+        wgt::BufferBindingType::Storage { .. } => wgt::COPY_BUFFER_ALIGNMENT,
+    }
+}
+
 #[derive(Debug)]
 pub struct BindGroup {
     pub(crate) raw: Snatchable<Box<dyn hal::DynBindGroup>>,
diff --git a/wgpu-core/src/command/clear.rs b/wgpu-core/src/command/clear.rs
index 944dd40af4..07dbfec95c 100644
--- a/wgpu-core/src/command/clear.rs
+++ b/wgpu-core/src/command/clear.rs
@@ -371,7 +371,7 @@ fn clear_texture_via_buffer_copies(
     for mip_level in range.mip_range {
         let mut mip_size = texture_desc.mip_level_size(mip_level).unwrap();
         // Round to multiple of block size
-        mip_size.width = align_to(mip_size.width, block_width);
+        mip_size.width = alignxo_to(mip_size.width, block_width);
         mip_size.height = align_to(mip_size.height, block_height);
 
         let bytes_per_row = align_to(
diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index 9f10104ee7..9cdf838299 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -39,7 +39,7 @@ use once_cell::sync::OnceCell;
 
 use smallvec::SmallVec;
 use thiserror::Error;
-use wgt::{DeviceLostReason, TextureFormat, TextureSampleType, TextureViewDimension};
+use wgt::{DeviceLostReason, TextureFormat, TextureSampleType, TextureViewDimension, math::align_to};
 
 use std::{
     borrow::Cow,
@@ -2004,10 +2004,23 @@ impl Device {
             late_buffer_binding_sizes.insert(binding, late_size);
         }
 
+        // This was checked against the device's alignment requirements above,
+        // which should always be a multiple of `COPY_BUFFER_ALIGNMENT`.
         assert_eq!(bb.offset % wgt::COPY_BUFFER_ALIGNMENT, 0);
+
+        // `wgpu_hal` only restricts shader access to bound buffer regions with
+        // a certain resolution. For the sake of lazy initialization, round up
+        // the size of the bound range to reflect how much of the buffer is
+        // actually going to be visible to the shader.
+        let bounds_check_alignment = binding_model::buffer_binding_type_bounds_check_alignment(
+            &self.alignments,
+            binding_ty,
+        );
+        let visible_size = align_to(bind_size, bounds_check_alignment);
+        
         used_buffer_ranges.extend(buffer.initialization_status.read().create_action(
             buffer,
-            bb.offset..bb.offset + bind_size,
+            bb.offset..bb.offset + visible_size,
             MemoryInitKind::NeedsInitializedMemory,
         ));
 
diff --git a/wgpu-hal/src/dx12/adapter.rs b/wgpu-hal/src/dx12/adapter.rs
index 00930024ab..b2b710941f 100644
--- a/wgpu-hal/src/dx12/adapter.rs
+++ b/wgpu-hal/src/dx12/adapter.rs
@@ -524,6 +524,9 @@ impl super::Adapter {
                         Direct3D12::D3D12_TEXTURE_DATA_PITCH_ALIGNMENT as u64,
                     )
                     .unwrap(),
+                    // Direct3D correctly bounds-checks all array accesses:
+                    // https://microsoft.github.io/DirectX-Specs/d3d/archive/D3D11_3_FunctionalSpec.htm#18.6.8.2%20Device%20Memory%20Reads
+                    uniform_bounds_check_alignment: wgt::BufferSize::new(1).unwrap(),
                 },
                 downlevel,
             },
diff --git a/wgpu-hal/src/gles/adapter.rs b/wgpu-hal/src/gles/adapter.rs
index e7ecacebe0..de3c0e903e 100644
--- a/wgpu-hal/src/gles/adapter.rs
+++ b/wgpu-hal/src/gles/adapter.rs
@@ -841,6 +841,16 @@ impl super::Adapter {
                 alignments: crate::Alignments {
                     buffer_copy_offset: wgt::BufferSize::new(4).unwrap(),
                     buffer_copy_pitch: wgt::BufferSize::new(4).unwrap(),
+                    // #6151: `wgpu_hal::gles` doesn't ask Naga to inject bounds
+                    // checks in GLSL, and it doesn't request extensions like
+                    // `KHR_robust_buffer_access_behavior` that would provide
+                    // them, so we can't really implement the checks promised by
+                    // [`crate::BufferBinding`].
+                    //
+                    // Since this is a pre-existing condition, for the time
+                    // being, provide 1 as the value here, to cause as little
+                    // trouble as possible.
+                    uniform_bounds_check_alignment: wgt::BufferSize::new(1).unwrap(),
                 },
             },
         })
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index 92afa71d43..04638d6b4c 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -1635,9 +1635,25 @@ pub struct InstanceDescriptor<'a> {
 pub struct Alignments {
     /// The alignment of the start of the buffer used as a GPU copy source.
     pub buffer_copy_offset: wgt::BufferSize,
+
     /// The alignment of the row pitch of the texture data stored in a buffer that is
     /// used in a GPU copy operation.
     pub buffer_copy_pitch: wgt::BufferSize,
+
+    /// The finest alignment of bound range checking for uniform buffers.
+    ///
+    /// As explained in the documentation for [`BufferBinding`], when `wgpu_hal`
+    /// restricts shader references to the accessible region of a [`Uniform`]
+    /// buffer, the size of the accessible region is the bind group binding's
+    /// stated [size], rounded up to the next multiple of this value.
+    ///
+    /// We don't need an analogous field for storage buffers, because Vulkan and
+    /// all other platforms promise at most four-byte alignment, so `wgpu_hal`
+    /// always rounds up the size by that much.
+    ///
+    /// [`Uniform`]: wgt::BufferBindingType::Uniform
+    /// [size]: BufferBinding::size
+    pub uniform_bounds_check_alignment: wgt::BufferSize,
 }
 
 #[derive(Clone, Debug)]
@@ -1807,6 +1823,33 @@ pub struct PipelineLayoutDescriptor<'a, B: DynBindGroupLayout + ?Sized> {
     pub push_constant_ranges: &'a [wgt::PushConstantRange],
 }
 
+/// A region of a buffer made visible to shaders via a [`BindGroup`].
+///
+/// `wgpu_hal` guarantees that shaders compiled with
+/// [`ShaderModuleDescriptor::runtime_checks`] set to `true` cannot read or
+/// write data via this binding outside the *accessible region* of [`buffer`]:
+///
+/// - The accessible region starts at [`offset`].
+///
+/// - For [`Storage`] bindings, the size of the accessible region is [`size`],
+///   which must be a multiple of 4.
+///
+/// - For [`Uniform`] bindings, the size of the accessible region is [`size`]
+///   rounded up to the next multiple of
+///   [`Alignments::uniform_bounds_check_alignment`].
+///
+/// Note that this guarantee is stricter than WGSL's requirements for
+/// [out-of-bounds accesses][woob], as WGSL allows them to return values from
+/// elsewhere in the buffer. But this guarantee is necessary anyway, to
+/// permit `wgpu-core` to use lazy buffer initialization.
+///
+/// [`BindGroup`]: Api::BindGroup
+/// [`buffer`]: BufferBinding::buffer
+/// [`offset`]: BufferBinding::offset
+/// [`size`]: BufferBinding::size
+/// [`Storage`]: wgt::BufferBindingType::Storage
+/// [`Uniform`]: wgt::BufferBindingType::Uniform
+/// [woob]: https://gpuweb.github.io/gpuweb/wgsl/#out-of-bounds-access-sec
 #[derive(Debug)]
 pub struct BufferBinding<'a, B: DynBuffer + ?Sized> {
     /// The buffer being bound.
@@ -1925,6 +1968,24 @@ pub enum ShaderInput<'a> {
 
 pub struct ShaderModuleDescriptor<'a> {
     pub label: Label<'a>,
+
+    /// Enforce bounds checks in shaders, even if the underlying driver doesn't
+    /// support doing so natively.
+    ///
+    /// When this is `true`, `wgpu_hal` promises that shaders can only read or
+    /// write the *accessible region* of a bindgroup's buffer bindings, as
+    /// explained in the documentation for [`BufferBinding`]. If the underlying
+    /// graphics platform cannot implement these bounds checks itself,
+    /// `wgpu_hal` will inject bounds checks before presenting the shader to the
+    /// platform.
+    ///
+    /// When this is `false`, `wgpu_hal` only enforces such bounds checks if the
+    /// underlying platform provides a way to do so itself.
+    ///
+    /// Note that `wgpu_hal` users may try to initialize only those portions of
+    /// buffers that they anticipate might be read from. Passing `false` here
+    /// may allow shaders to see wider regions of the buffers than expected,
+    /// making such deferred initialization visible to the application.
     pub runtime_checks: bool,
 }
 
diff --git a/wgpu-hal/src/metal/adapter.rs b/wgpu-hal/src/metal/adapter.rs
index 5ef6d358b8..47642079a0 100644
--- a/wgpu-hal/src/metal/adapter.rs
+++ b/wgpu-hal/src/metal/adapter.rs
@@ -997,6 +997,10 @@ impl super::PrivateCapabilities {
             alignments: crate::Alignments {
                 buffer_copy_offset: wgt::BufferSize::new(self.buffer_alignment).unwrap(),
                 buffer_copy_pitch: wgt::BufferSize::new(4).unwrap(),
+                // This backend has Naga incorporate bounds checks into the
+                // Metal Shading Language it generates, so from `wgpu_hal`'s
+                // users' point of view, references are tightly checked.
+                uniform_bounds_check_alignment: wgt::BufferSize::new(1),
             },
             downlevel,
         }
diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs
index 25d0d75049..18a5d4aeb9 100644
--- a/wgpu-hal/src/vulkan/adapter.rs
+++ b/wgpu-hal/src/vulkan/adapter.rs
@@ -342,9 +342,6 @@ impl PhysicalDeviceFeatures {
                 None
             },
             robustness2: if enabled_extensions.contains(&ext::robustness2::NAME) {
-                // Note: enabling `robust_buffer_access2` isn't requires, strictly speaking
-                // since we can enable `robust_buffer_access` all the time. But it improves
-                // program portability, so we opt into it if they are supported.
                 Some(
                     vk::PhysicalDeviceRobustness2FeaturesEXT::default()
                         .robust_buffer_access2(private_caps.robust_buffer_access2)
@@ -842,6 +839,10 @@ pub struct PhysicalDeviceProperties {
     /// `VK_EXT_subgroup_size_control` extension, promoted to Vulkan 1.3.
     subgroup_size_control: Option<vk::PhysicalDeviceSubgroupSizeControlProperties<'static>>,
 
+    /// Additional `vk::PhysicalDevice` properties from the
+    /// `VK_EXT_robustness2` extension.
+    robustness2: Option<vk::PhysicalDeviceRobustness2PropertiesEXT<'static>>,
+
     /// The device API version.
     ///
     /// Which is the version of Vulkan supported for device-level functionality.
@@ -1104,6 +1105,15 @@ impl PhysicalDeviceProperties {
                 .unwrap(),
             buffer_copy_pitch: wgt::BufferSize::new(limits.optimal_buffer_copy_row_pitch_alignment)
                 .unwrap(),
+            uniform_bounds_check_alignment: {
+                let alignment = if let Some(ref robustness2) = self.robustness2 {
+                    robustness2.robust_uniform_buffer_access_size_alignment
+                } else {
+                    // Naga-injected bounds checks are precise.
+                    1
+                };
+                wgt::BufferSize::new(alignment).unwrap()
+            },
         }
     }
 }
@@ -1133,6 +1143,7 @@ impl super::InstanceShared {
                 let supports_subgroup_size_control = capabilities.device_api_version
                     >= vk::API_VERSION_1_3
                     || capabilities.supports_extension(ext::subgroup_size_control::NAME);
+                let supports_robustness2 = capabilities.supports_extension(ext::robustness2::NAME);
 
                 let supports_acceleration_structure =
                     capabilities.supports_extension(khr::acceleration_structure::NAME);
@@ -1180,6 +1191,13 @@ impl super::InstanceShared {
                     properties2 = properties2.push_next(next);
                 }
 
+                if supports_robustness2 {
+                    let next = capabilities
+                        .robustness2
+                        .insert(vk::PhysicalDeviceRobustness2PropertiesEXT::default());
+                    properties2 = properties2.push_next(next);
+                }
+
                 unsafe {
                     get_device_properties.get_physical_device_properties2(phd, &mut properties2)
                 };
@@ -1191,6 +1209,7 @@ impl super::InstanceShared {
                     capabilities
                         .supported_extensions
                         .retain(|&x| x.extension_name_as_c_str() != Ok(ext::robustness2::NAME));
+                    capabilities.robustness2 = None;
                 }
             };
             capabilities
@@ -1779,7 +1798,7 @@ impl super::Adapter {
                 capabilities: Some(capabilities.iter().cloned().collect()),
                 bounds_check_policies: naga::proc::BoundsCheckPolicies {
                     index: naga::proc::BoundsCheckPolicy::Restrict,
-                    buffer: if self.private_caps.robust_buffer_access {
+                    buffer: if self.private_caps.robust_buffer_access2 {
                         naga::proc::BoundsCheckPolicy::Unchecked
                     } else {
                         naga::proc::BoundsCheckPolicy::Restrict
diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs
index 0ee90c4590..89537300c5 100644
--- a/wgpu-hal/src/vulkan/mod.rs
+++ b/wgpu-hal/src/vulkan/mod.rs
@@ -493,9 +493,33 @@ struct PrivateCapabilities {
     /// Ability to present contents to any screen. Only needed to work around broken platform configurations.
     can_present: bool,
     non_coherent_map_mask: wgt::BufferAddress,
+
+    /// True if this adapter advertises the [`robustBufferAccess`][vrba] feature.
+    ///
+    /// Note that Vulkan's `robustBufferAccess` is not sufficient to implement
+    /// `wgpu_hal`'s guarantees. As explained in the documentation for
+    /// [`BufferBinding`], shaders must not be able to access buffer contents
+    /// via a given bindgroup binding beyond that binding's *accessible region*.
+    /// Enabling `robustBufferAccess` does ensure that out-of-bounds reads and
+    /// writes are not undefined behavior (that's good), but still permits
+    /// out-of-bounds reads to return data from anywhere within the buffer, not
+    /// just the accessible region.
+    ///
+    /// [vrba]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#features-robustBufferAccess
+    /// [`BufferBinding`]: crate::BufferBinding
     robust_buffer_access: bool,
+
     robust_image_access: bool,
+
+    /// True if this adapter supports the [`VK_EXT_robustness2`] extension's
+    /// `robustBufferAccess2` feature.
+    ///
+    /// This is sufficient to implement `wgpu_hal`'s required bounds-checking of
+    /// shader accesses to buffer contents. If this feature is not available,
+    /// this backend must have Naga inject bounds checks in the generated
+    /// SPIR-V.
     robust_buffer_access2: bool,
+
     robust_image_access2: bool,
     zero_initialize_workgroup_memory: bool,
     image_format_list: bool,
diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs
index cd2f5dcd0a..1983dc3181 100644
--- a/wgpu-types/src/lib.rs
+++ b/wgpu-types/src/lib.rs
@@ -7309,8 +7309,14 @@ impl ShaderBoundChecks {
     /// Creates a new configuration where the shader isn't bound checked.
     ///
     /// # Safety
-    /// The caller MUST ensure that all shaders built with this configuration don't perform any
-    /// out of bounds reads or writes.
+    ///
+    /// The caller MUST ensure that all shaders built with this configuration
+    /// don't perform any out of bounds reads or writes. `wgpu_core`, in
+    /// particular, initializes only those portions of buffers that it expects
+    /// might be read, and it does not expect contents outside the ranges bound
+    /// in bindgroups to be accessible, so using this configuration with
+    /// ill-behaved shaders could expose uninitialized GPU memory contents to
+    /// the application.
     #[must_use]
     pub unsafe fn unchecked() -> Self {
         ShaderBoundChecks {