From 2cd0bd757594083e5ebb78fed22078e51c9bcc8b Mon Sep 17 00:00:00 2001 From: robtfm <50659922+robtfm@users.noreply.github.com> Date: Fri, 18 Nov 2022 22:04:23 +0000 Subject: [PATCH] improve compile time by type-erasing wgpu structs (#5950) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Objective structs containing wgpu types take a long time to compile. this is particularly bad for generics containing the wgpu structs (like the depth pipeline builder with `#[derive(SystemParam)]` i've been working on). we can avoid that by boxing and type-erasing in the bevy `render_resource` wrappers. type system magic is not a strength of mine so i guess there will be a cleaner way to achieve this, happy to take feedback or for it to be taken as a proof of concept if someone else wants to do a better job. ## Solution - add macros to box and type-erase in debug mode - leave current impl for release mode timings: current |   |   |   -- | -- | -- | --   | Total time: | 64.9s |     | bevy_pbr v0.9.0-dev | 19.2s |     | bevy_render v0.9.0-dev | 17.0s |     | bevy_sprite v0.9.0-dev | 15.1s |     | DepthPipelineBuilder | 18.7s |     |   |   |   with type-erasing |   |   | diff   | Total time: | 49.0s | -24%   | bevy_render v0.9.0-dev | 12.0s | -38%   | bevy_pbr v0.9.0-dev | 8.7s | -49%   | bevy_sprite v0.9.0-dev | 6.1s | -60%   | DepthPipelineBuilder | 1.2s | -94% the depth pipeline builder is a binary with body: ```rust use std::{marker::PhantomData, hash::Hash}; use bevy::{prelude::*, ecs::system::SystemParam, pbr::{RenderMaterials, MaterialPipeline, ShadowPipeline}, render::{renderer::RenderDevice, render_resource::{SpecializedMeshPipelines, PipelineCache}, render_asset::RenderAssets}}; fn main() { println!("Hello, world p!\n"); } #[derive(SystemParam)] pub struct DepthPipelineBuilder<'w, 's, M: Material> where M::Data: Eq + Hash + Clone, { render_device: Res<'w, RenderDevice>, material_pipeline: Res<'w, MaterialPipeline>, material_pipelines: ResMut<'w, SpecializedMeshPipelines>>, shadow_pipeline: Res<'w, ShadowPipeline>, pipeline_cache: ResMut<'w, PipelineCache>, render_meshes: Res<'w, RenderAssets>, render_materials: Res<'w, RenderMaterials>, msaa: Res<'w, Msaa>, #[system_param(ignore)] _p: PhantomData<&'s M>, } ``` --- .../src/render_resource/bind_group.rs | 9 +- .../src/render_resource/bind_group_layout.rs | 9 +- .../bevy_render/src/render_resource/buffer.rs | 13 +- crates/bevy_render/src/render_resource/mod.rs | 1 + .../src/render_resource/pipeline.rs | 16 ++- .../src/render_resource/pipeline_cache.rs | 28 ++-- .../src/render_resource/resource_macros.rs | 123 ++++++++++++++++++ .../src/render_resource/texture.rs | 33 +++-- crates/bevy_render/src/renderer/mod.rs | 2 - .../bevy_render/src/renderer/render_device.rs | 15 ++- 10 files changed, 202 insertions(+), 47 deletions(-) create mode 100644 crates/bevy_render/src/render_resource/resource_macros.rs diff --git a/crates/bevy_render/src/render_resource/bind_group.rs b/crates/bevy_render/src/render_resource/bind_group.rs index 8362a5c899ad5..a8d4a06e17f13 100644 --- a/crates/bevy_render/src/render_resource/bind_group.rs +++ b/crates/bevy_render/src/render_resource/bind_group.rs @@ -9,9 +9,12 @@ use crate::{ texture::FallbackImage, }; use bevy_reflect::Uuid; -use std::{ops::Deref, sync::Arc}; +use std::ops::Deref; use wgpu::BindingResource; +use crate::render_resource::resource_macros::*; +render_resource_wrapper!(ErasedBindGroup, wgpu::BindGroup); + /// A [`BindGroup`] identifier. #[derive(Copy, Clone, Hash, Eq, PartialEq, Debug)] pub struct BindGroupId(Uuid); @@ -25,7 +28,7 @@ pub struct BindGroupId(Uuid); #[derive(Clone, Debug)] pub struct BindGroup { id: BindGroupId, - value: Arc, + value: ErasedBindGroup, } impl BindGroup { @@ -40,7 +43,7 @@ impl From for BindGroup { fn from(value: wgpu::BindGroup) -> Self { BindGroup { id: BindGroupId(Uuid::new_v4()), - value: Arc::new(value), + value: ErasedBindGroup::new(value), } } } diff --git a/crates/bevy_render/src/render_resource/bind_group_layout.rs b/crates/bevy_render/src/render_resource/bind_group_layout.rs index de0ce253f8a5f..47edbbd113580 100644 --- a/crates/bevy_render/src/render_resource/bind_group_layout.rs +++ b/crates/bevy_render/src/render_resource/bind_group_layout.rs @@ -1,13 +1,16 @@ +use crate::render_resource::resource_macros::*; use bevy_reflect::Uuid; -use std::{ops::Deref, sync::Arc}; +use std::ops::Deref; #[derive(Copy, Clone, Hash, Eq, PartialEq, Debug)] pub struct BindGroupLayoutId(Uuid); +render_resource_wrapper!(ErasedBindGroupLayout, wgpu::BindGroupLayout); + #[derive(Clone, Debug)] pub struct BindGroupLayout { id: BindGroupLayoutId, - value: Arc, + value: ErasedBindGroupLayout, } impl PartialEq for BindGroupLayout { @@ -32,7 +35,7 @@ impl From for BindGroupLayout { fn from(value: wgpu::BindGroupLayout) -> Self { BindGroupLayout { id: BindGroupLayoutId(Uuid::new_v4()), - value: Arc::new(value), + value: ErasedBindGroupLayout::new(value), } } } diff --git a/crates/bevy_render/src/render_resource/buffer.rs b/crates/bevy_render/src/render_resource/buffer.rs index 084a3290c26f1..6bae2e529ab41 100644 --- a/crates/bevy_render/src/render_resource/buffer.rs +++ b/crates/bevy_render/src/render_resource/buffer.rs @@ -1,16 +1,17 @@ use bevy_utils::Uuid; -use std::{ - ops::{Bound, Deref, RangeBounds}, - sync::Arc, -}; +use std::ops::{Bound, Deref, RangeBounds}; + +use crate::render_resource::resource_macros::*; #[derive(Copy, Clone, Hash, Eq, PartialEq, Debug)] pub struct BufferId(Uuid); +render_resource_wrapper!(ErasedBuffer, wgpu::Buffer); + #[derive(Clone, Debug)] pub struct Buffer { id: BufferId, - value: Arc, + value: ErasedBuffer, } impl Buffer { @@ -42,7 +43,7 @@ impl From for Buffer { fn from(value: wgpu::Buffer) -> Self { Buffer { id: BufferId(Uuid::new_v4()), - value: Arc::new(value), + value: ErasedBuffer::new(value), } } } diff --git a/crates/bevy_render/src/render_resource/mod.rs b/crates/bevy_render/src/render_resource/mod.rs index a0206b1c7f233..33a010ff502d1 100644 --- a/crates/bevy_render/src/render_resource/mod.rs +++ b/crates/bevy_render/src/render_resource/mod.rs @@ -5,6 +5,7 @@ mod buffer_vec; mod pipeline; mod pipeline_cache; mod pipeline_specializer; +pub mod resource_macros; mod shader; mod storage_buffer; mod texture; diff --git a/crates/bevy_render/src/render_resource/pipeline.rs b/crates/bevy_render/src/render_resource/pipeline.rs index 7cf6ded22c2d4..b63febfd53ce7 100644 --- a/crates/bevy_render/src/render_resource/pipeline.rs +++ b/crates/bevy_render/src/render_resource/pipeline.rs @@ -1,16 +1,20 @@ use crate::render_resource::{BindGroupLayout, Shader}; use bevy_asset::Handle; use bevy_reflect::Uuid; -use std::{borrow::Cow, ops::Deref, sync::Arc}; +use std::{borrow::Cow, ops::Deref}; use wgpu::{ BufferAddress, ColorTargetState, DepthStencilState, MultisampleState, PrimitiveState, VertexAttribute, VertexFormat, VertexStepMode, }; +use crate::render_resource::resource_macros::*; + /// A [`RenderPipeline`] identifier. #[derive(Copy, Clone, Hash, Eq, PartialEq, Debug)] pub struct RenderPipelineId(Uuid); +render_resource_wrapper!(ErasedRenderPipeline, wgpu::RenderPipeline); + /// A [`RenderPipeline`] represents a graphics pipeline and its stages (shaders), bindings and vertex buffers. /// /// May be converted from and dereferences to a wgpu [`RenderPipeline`](wgpu::RenderPipeline). @@ -18,7 +22,7 @@ pub struct RenderPipelineId(Uuid); #[derive(Clone, Debug)] pub struct RenderPipeline { id: RenderPipelineId, - value: Arc, + value: ErasedRenderPipeline, } impl RenderPipeline { @@ -32,7 +36,7 @@ impl From for RenderPipeline { fn from(value: wgpu::RenderPipeline) -> Self { RenderPipeline { id: RenderPipelineId(Uuid::new_v4()), - value: Arc::new(value), + value: ErasedRenderPipeline::new(value), } } } @@ -50,6 +54,8 @@ impl Deref for RenderPipeline { #[derive(Copy, Clone, Hash, Eq, PartialEq, Debug)] pub struct ComputePipelineId(Uuid); +render_resource_wrapper!(ErasedComputePipeline, wgpu::ComputePipeline); + /// A [`ComputePipeline`] represents a compute pipeline and its single shader stage. /// /// May be converted from and dereferences to a wgpu [`ComputePipeline`](wgpu::ComputePipeline). @@ -57,7 +63,7 @@ pub struct ComputePipelineId(Uuid); #[derive(Clone, Debug)] pub struct ComputePipeline { id: ComputePipelineId, - value: Arc, + value: ErasedComputePipeline, } impl ComputePipeline { @@ -72,7 +78,7 @@ impl From for ComputePipeline { fn from(value: wgpu::ComputePipeline) -> Self { ComputePipeline { id: ComputePipelineId(Uuid::new_v4()), - value: Arc::new(value), + value: ErasedComputePipeline::new(value), } } } diff --git a/crates/bevy_render/src/render_resource/pipeline_cache.rs b/crates/bevy_render/src/render_resource/pipeline_cache.rs index e03857e788a8e..659844d93ed39 100644 --- a/crates/bevy_render/src/render_resource/pipeline_cache.rs +++ b/crates/bevy_render/src/render_resource/pipeline_cache.rs @@ -17,13 +17,17 @@ use bevy_utils::{ tracing::{debug, error}, Entry, HashMap, HashSet, }; -use std::{hash::Hash, iter::FusedIterator, mem, ops::Deref, sync::Arc}; +use std::{hash::Hash, iter::FusedIterator, mem, ops::Deref}; use thiserror::Error; use wgpu::{ - BufferBindingType, PipelineLayoutDescriptor, ShaderModule, - VertexBufferLayout as RawVertexBufferLayout, + BufferBindingType, PipelineLayoutDescriptor, VertexBufferLayout as RawVertexBufferLayout, }; +use crate::render_resource::resource_macros::*; + +render_resource_wrapper!(ErasedShaderModule, wgpu::ShaderModule); +render_resource_wrapper!(ErasedPipelineLayout, wgpu::PipelineLayout); + /// A descriptor for a [`Pipeline`]. /// /// Used to store an heterogenous collection of render and compute pipeline descriptors together. @@ -103,7 +107,7 @@ impl CachedPipelineState { #[derive(Default)] struct ShaderData { pipelines: HashSet, - processed_shaders: HashMap, Arc>, + processed_shaders: HashMap, ErasedShaderModule>, resolved_imports: HashMap>, dependents: HashSet>, } @@ -124,7 +128,7 @@ impl ShaderCache { pipeline: CachedPipelineId, handle: &Handle, shader_defs: &[String], - ) -> Result, PipelineCacheError> { + ) -> Result { let shader = self .shaders .get(handle) @@ -204,7 +208,7 @@ impl ShaderCache { return Err(PipelineCacheError::CreateShaderModule(description)); } - entry.insert(Arc::new(shader_module)) + entry.insert(ErasedShaderModule::new(shader_module)) } }; @@ -276,7 +280,7 @@ impl ShaderCache { #[derive(Default)] struct LayoutCache { - layouts: HashMap, wgpu::PipelineLayout>, + layouts: HashMap, ErasedPipelineLayout>, } impl LayoutCache { @@ -291,10 +295,12 @@ impl LayoutCache { .iter() .map(|l| l.value()) .collect::>(); - render_device.create_pipeline_layout(&PipelineLayoutDescriptor { - bind_group_layouts: &bind_group_layouts, - ..default() - }) + ErasedPipelineLayout::new(render_device.create_pipeline_layout( + &PipelineLayoutDescriptor { + bind_group_layouts: &bind_group_layouts, + ..default() + }, + )) }) } } diff --git a/crates/bevy_render/src/render_resource/resource_macros.rs b/crates/bevy_render/src/render_resource/resource_macros.rs new file mode 100644 index 0000000000000..4f1a69673b043 --- /dev/null +++ b/crates/bevy_render/src/render_resource/resource_macros.rs @@ -0,0 +1,123 @@ +// structs containing wgpu types take a long time to compile. this is particularly bad for generic +// structs containing wgpu structs. we avoid that in debug builds (and for cargo check and rust analyzer) +// by boxing and type-erasing with the `render_resource_wrapper` macro. +// analysis from https://github.com/bevyengine/bevy/pull/5950#issuecomment-1243473071 indicates this is +// due to `evaluate_obligations`. we should check if this can be removed after a fix lands for +// https://github.com/rust-lang/rust/issues/99188 (and after other `evaluate_obligations`-related changes). +#[cfg(debug_assertions)] +#[macro_export] +macro_rules! render_resource_wrapper { + ($wrapper_type:ident, $wgpu_type:ty) => { + #[derive(Clone, Debug)] + pub struct $wrapper_type(Option>>); + + impl $wrapper_type { + pub fn new(value: $wgpu_type) -> Self { + unsafe { + Self(Some(std::sync::Arc::new(std::mem::transmute(Box::new( + value, + ))))) + } + } + + pub fn try_unwrap(mut self) -> Option<$wgpu_type> { + let inner = self.0.take(); + if let Some(inner) = inner { + match std::sync::Arc::try_unwrap(inner) { + Ok(untyped_box) => { + let typed_box = unsafe { + std::mem::transmute::, Box<$wgpu_type>>(untyped_box) + }; + Some(*typed_box) + } + Err(inner) => { + let _ = unsafe { + std::mem::transmute::< + std::sync::Arc>, + std::sync::Arc>, + >(inner) + }; + None + } + } + } else { + None + } + } + } + + impl std::ops::Deref for $wrapper_type { + type Target = $wgpu_type; + + fn deref(&self) -> &Self::Target { + let untyped_box = self + .0 + .as_ref() + .expect("render_resource_wrapper inner value has already been taken (via drop or try_unwrap") + .as_ref(); + + let typed_box = + unsafe { std::mem::transmute::<&Box<()>, &Box<$wgpu_type>>(untyped_box) }; + typed_box.as_ref() + } + } + + impl Drop for $wrapper_type { + fn drop(&mut self) { + let inner = self.0.take(); + if let Some(inner) = inner { + let _ = unsafe { + std::mem::transmute::< + std::sync::Arc>, + std::sync::Arc>, + >(inner) + }; + } + } + } + + // Arc> and Arc<()> will be Sync and Send even when $wgpu_type is not Sync or Send. + // We ensure correctness by checking that $wgpu_type does implement Send and Sync. + // If in future there is a case where a wrapper is required for a non-send/sync type + // we can implement a macro variant that also does `impl !Send for $wrapper_type {}` and + // `impl !Sync for $wrapper_type {}` + const _: () = { + trait AssertSendSyncBound: Send + Sync {} + impl AssertSendSyncBound for $wgpu_type {} + }; + }; +} + +#[cfg(not(debug_assertions))] +#[macro_export] +macro_rules! render_resource_wrapper { + ($wrapper_type:ident, $wgpu_type:ty) => { + #[derive(Clone, Debug)] + pub struct $wrapper_type(std::sync::Arc<$wgpu_type>); + + impl $wrapper_type { + pub fn new(value: $wgpu_type) -> Self { + Self(std::sync::Arc::new(value)) + } + + pub fn try_unwrap(self) -> Option<$wgpu_type> { + std::sync::Arc::try_unwrap(self.0).ok() + } + } + + impl std::ops::Deref for $wrapper_type { + type Target = $wgpu_type; + + fn deref(&self) -> &Self::Target { + self.0.as_ref() + } + } + + const _: () = { + trait AssertSendSyncBound: Send + Sync {} + impl AssertSendSyncBound for $wgpu_type {} + }; + }; +} + +pub use render_resource_wrapper; diff --git a/crates/bevy_render/src/render_resource/texture.rs b/crates/bevy_render/src/render_resource/texture.rs index 4a7d582d7559e..330d7cdb0b85e 100644 --- a/crates/bevy_render/src/render_resource/texture.rs +++ b/crates/bevy_render/src/render_resource/texture.rs @@ -1,10 +1,14 @@ use bevy_utils::Uuid; -use std::{ops::Deref, sync::Arc}; +use std::ops::Deref; + +use crate::render_resource::resource_macros::*; /// A [`Texture`] identifier. #[derive(Copy, Clone, Hash, Eq, PartialEq, Debug)] pub struct TextureId(Uuid); +render_resource_wrapper!(ErasedTexture, wgpu::Texture); + /// A GPU-accessible texture. /// /// May be converted from and dereferences to a wgpu [`Texture`](wgpu::Texture). @@ -12,7 +16,7 @@ pub struct TextureId(Uuid); #[derive(Clone, Debug)] pub struct Texture { id: TextureId, - value: Arc, + value: ErasedTexture, } impl Texture { @@ -32,7 +36,7 @@ impl From for Texture { fn from(value: wgpu::Texture) -> Self { Texture { id: TextureId(Uuid::new_v4()), - value: Arc::new(value), + value: ErasedTexture::new(value), } } } @@ -50,20 +54,23 @@ impl Deref for Texture { #[derive(Copy, Clone, Hash, Eq, PartialEq, Debug)] pub struct TextureViewId(Uuid); +render_resource_wrapper!(ErasedTextureView, wgpu::TextureView); +render_resource_wrapper!(ErasedSurfaceTexture, wgpu::SurfaceTexture); + /// This type combines wgpu's [`TextureView`](wgpu::TextureView) and /// [`SurfaceTexture`](wgpu::SurfaceTexture) into the same interface. #[derive(Clone, Debug)] pub enum TextureViewValue { /// The value is an actual wgpu [`TextureView`](wgpu::TextureView). - TextureView(Arc), + TextureView(ErasedTextureView), /// The value is a wgpu [`SurfaceTexture`](wgpu::SurfaceTexture), but dereferences to /// a [`TextureView`](wgpu::TextureView). SurfaceTexture { // NOTE: The order of these fields is important because the view must be dropped before the // frame is dropped - view: Arc, - texture: Arc, + view: ErasedTextureView, + texture: ErasedSurfaceTexture, }, } @@ -89,7 +96,7 @@ impl TextureView { pub fn take_surface_texture(self) -> Option { match self.value { TextureViewValue::TextureView(_) => None, - TextureViewValue::SurfaceTexture { texture, .. } => Arc::try_unwrap(texture).ok(), + TextureViewValue::SurfaceTexture { texture, .. } => texture.try_unwrap(), } } } @@ -98,15 +105,15 @@ impl From for TextureView { fn from(value: wgpu::TextureView) -> Self { TextureView { id: TextureViewId(Uuid::new_v4()), - value: TextureViewValue::TextureView(Arc::new(value)), + value: TextureViewValue::TextureView(ErasedTextureView::new(value)), } } } impl From for TextureView { fn from(value: wgpu::SurfaceTexture) -> Self { - let texture = Arc::new(value); - let view = Arc::new(texture.texture.create_view(&Default::default())); + let view = ErasedTextureView::new(value.texture.create_view(&Default::default())); + let texture = ErasedSurfaceTexture::new(value); TextureView { id: TextureViewId(Uuid::new_v4()), @@ -131,6 +138,8 @@ impl Deref for TextureView { #[derive(Copy, Clone, Hash, Eq, PartialEq, Debug)] pub struct SamplerId(Uuid); +render_resource_wrapper!(ErasedSampler, wgpu::Sampler); + /// A Sampler defines how a pipeline will sample from a [`TextureView`]. /// They define image filters (including anisotropy) and address (wrapping) modes, among other things. /// @@ -139,7 +148,7 @@ pub struct SamplerId(Uuid); #[derive(Clone, Debug)] pub struct Sampler { id: SamplerId, - value: Arc, + value: ErasedSampler, } impl Sampler { @@ -154,7 +163,7 @@ impl From for Sampler { fn from(value: wgpu::Sampler) -> Self { Sampler { id: SamplerId(Uuid::new_v4()), - value: Arc::new(value), + value: ErasedSampler::new(value), } } } diff --git a/crates/bevy_render/src/renderer/mod.rs b/crates/bevy_render/src/renderer/mod.rs index 3d74ecce4c2b2..6f697ca7d0594 100644 --- a/crates/bevy_render/src/renderer/mod.rs +++ b/crates/bevy_render/src/renderer/mod.rs @@ -261,8 +261,6 @@ pub async fn initialize_renderer( ) .await .unwrap(); - - let device = Arc::new(device); let queue = Arc::new(queue); let adapter = Arc::new(adapter); ( diff --git a/crates/bevy_render/src/renderer/render_device.rs b/crates/bevy_render/src/renderer/render_device.rs index 0ac5acbc6fa84..eef8b100cb130 100644 --- a/crates/bevy_render/src/renderer/render_device.rs +++ b/crates/bevy_render/src/renderer/render_device.rs @@ -3,20 +3,25 @@ use crate::render_resource::{ RenderPipeline, Sampler, Texture, }; use bevy_ecs::system::Resource; -use std::sync::Arc; use wgpu::{util::DeviceExt, BufferAsyncError, BufferBindingType}; use super::RenderQueue; +use crate::render_resource::resource_macros::*; + +render_resource_wrapper!(ErasedRenderDevice, wgpu::Device); + /// This GPU device is responsible for the creation of most rendering and compute resources. #[derive(Resource, Clone)] pub struct RenderDevice { - device: Arc, + device: ErasedRenderDevice, } -impl From> for RenderDevice { - fn from(device: Arc) -> Self { - Self { device } +impl From for RenderDevice { + fn from(device: wgpu::Device) -> Self { + Self { + device: ErasedRenderDevice::new(device), + } } }