From 7a52266194092e864cf42b9951c94738fa291e5d Mon Sep 17 00:00:00 2001
From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com>
Date: Tue, 27 Feb 2024 13:42:35 +0000
Subject: [PATCH 01/31] Implement resource management of pipeline caches

---
 player/src/lib.rs                |  6 +++
 wgpu-core/src/device/global.rs   | 45 +++++++++++++++++
 wgpu-core/src/device/resource.rs | 17 +++++++
 wgpu-core/src/device/trace.rs    |  5 ++
 wgpu-core/src/hub.rs             |  7 ++-
 wgpu-core/src/id.rs              |  1 +
 wgpu-core/src/pipeline.rs        | 46 +++++++++++++++++
 wgpu-hal/src/empty.rs            |  8 +++
 wgpu-hal/src/gles/device.rs      |  5 ++
 wgpu-hal/src/gles/mod.rs         |  1 +
 wgpu-hal/src/lib.rs              | 11 +++++
 wgpu-hal/src/vulkan/device.rs    | 22 ++++++++-
 wgpu-hal/src/vulkan/mod.rs       |  6 +++
 wgpu-types/src/lib.rs            | 19 +++++++
 wgpu/src/backend/webgpu.rs       | 11 +++++
 wgpu/src/backend/wgpu_core.rs    | 64 ++++++++++++++++++++++--
 wgpu/src/context.rs              | 76 ++++++++++++++++++++++++++--
 wgpu/src/lib.rs                  | 85 ++++++++++++++++++++++++++++++++
 18 files changed, 424 insertions(+), 11 deletions(-)
diff --git a/player/src/lib.rs b/player/src/lib.rs
index c67c605e58..930fef151a 100644
--- a/player/src/lib.rs
+++ b/player/src/lib.rs
@@ -302,6 +302,12 @@ impl GlobalPlay for wgc::global::Global {
             Action::DestroyRenderPipeline(id) => {
                 self.render_pipeline_drop::<A>(id);
             }
+            Action::CreatePipelineCache { id, desc } => {
+                let _ = unsafe { self.device_create_pipeline_cache::<A>(device, &desc, Some(id)) };
+            }
+            Action::DestroyPipelineCache(id) => {
+                self.pipeline_cache_drop::<A>(id);
+            }
             Action::CreateRenderBundle { id, desc, base } => {
                 let bundle =
                     wgc::command::RenderBundleEncoder::new(&desc, device, Some(base)).unwrap();
diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs
index be524840b8..83777edf20 100644
--- a/wgpu-core/src/device/global.rs
+++ b/wgpu-core/src/device/global.rs
@@ -1825,6 +1825,51 @@ impl Global {
         }
     }
 
+    pub unsafe fn device_create_pipeline_cache<A: HalApi>(
+        &self,
+        device_id: DeviceId,
+        desc: &pipeline::PipelineCacheDescriptor<'_>,
+        id_in: Option<id::PipelineCacheId>,
+    ) -> Option<id::PipelineCacheId> {
+        profiling::scope!("Device::create_pipeline_cache");
+
+        let hub = A::hub(self);
+
+        let fid = hub.pipeline_caches.prepare(id_in);
+        let device = match hub.devices.get(device_id) {
+            Ok(device) => device,
+            // TODO: Handle error properly
+            Err(_) => return None,
+        };
+        if !device.is_valid() {
+            return None;
+        }
+
+        #[cfg(feature = "trace")]
+        if let Some(ref mut trace) = *device.trace.lock() {
+            trace.add(trace::Action::CreatePipelineCache {
+                id: fid.id(),
+                desc: desc.clone(),
+            });
+        }
+        let pipeline = unsafe { device.create_pipeline_cache(desc) }?;
+        let (id, _) = fid.assign(pipeline);
+        api_log!("Device::create_pipeline_cache -> {id:?}");
+
+        Some(id)
+    }
+
+    pub fn pipeline_cache_drop<A: HalApi>(&self, pipeline_cache_id: id::PipelineCacheId) {
+        profiling::scope!("PipelineCache::drop");
+        api_log!("PipelineCache::drop {pipeline_cache_id:?}");
+
+        let hub = A::hub(self);
+
+        if let Some(cache) = hub.pipeline_caches.unregister(pipeline_cache_id) {
+            drop(cache)
+        }
+    }
+
     pub fn surface_configure<A: HalApi>(
         &self,
         surface_id: SurfaceId,
diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index 2541af7c70..81f775f646 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -3489,6 +3489,23 @@ impl<A: HalApi> Device<A> {
         Ok(pipeline)
     }
 
+    pub unsafe fn create_pipeline_cache(
+        self: &Arc<Self>,
+        desc: &pipeline::PipelineCacheDescriptor,
+    ) -> Option<pipeline::PipelineCache<A>> {
+        let cache_desc = hal::PipelineCacheDescriptor {
+            data: desc.data.as_deref(),
+            label: desc.label.to_hal(self.instance_flags),
+        };
+        let raw = unsafe { (&self.raw.as_ref().unwrap()).create_pipeline_cache(&cache_desc) };
+        let cache = pipeline::PipelineCache {
+            device: self.clone(),
+            info: ResourceInfo::new(desc.label.borrow_or_default()),
+            raw,
+        };
+        Some(cache)
+    }
+
     pub(crate) fn get_texture_format_features(
         &self,
         adapter: &Adapter<A>,
diff --git a/wgpu-core/src/device/trace.rs b/wgpu-core/src/device/trace.rs
index 0802b610d8..24790103a5 100644
--- a/wgpu-core/src/device/trace.rs
+++ b/wgpu-core/src/device/trace.rs
@@ -98,6 +98,11 @@ pub enum Action<'a> {
         implicit_context: Option<super::ImplicitPipelineContext>,
     },
     DestroyRenderPipeline(id::RenderPipelineId),
+    CreatePipelineCache {
+        id: id::PipelineCacheId,
+        desc: crate::pipeline::PipelineCacheDescriptor<'a>,
+    },
+    DestroyPipelineCache(id::PipelineCacheId),
     CreateRenderBundle {
         id: id::RenderBundleId,
         desc: crate::command::RenderBundleEncoderDescriptor<'a>,
diff --git a/wgpu-core/src/hub.rs b/wgpu-core/src/hub.rs
index eb57411d98..a318f91fc0 100644
--- a/wgpu-core/src/hub.rs
+++ b/wgpu-core/src/hub.rs
@@ -110,7 +110,7 @@ use crate::{
     device::{queue::Queue, Device},
     hal_api::HalApi,
     instance::{Adapter, Surface},
-    pipeline::{ComputePipeline, RenderPipeline, ShaderModule},
+    pipeline::{ComputePipeline, PipelineCache, RenderPipeline, ShaderModule},
     registry::{Registry, RegistryReport},
     resource::{Buffer, QuerySet, Sampler, StagingBuffer, Texture, TextureView},
     storage::{Element, Storage},
@@ -130,6 +130,7 @@ pub struct HubReport {
     pub render_bundles: RegistryReport,
     pub render_pipelines: RegistryReport,
     pub compute_pipelines: RegistryReport,
+    pub pipeline_caches: RegistryReport,
     pub query_sets: RegistryReport,
     pub buffers: RegistryReport,
     pub textures: RegistryReport,
@@ -180,6 +181,7 @@ pub struct Hub<A: HalApi> {
     pub(crate) render_bundles: Registry<RenderBundle<A>>,
     pub(crate) render_pipelines: Registry<RenderPipeline<A>>,
     pub(crate) compute_pipelines: Registry<ComputePipeline<A>>,
+    pub(crate) pipeline_caches: Registry<PipelineCache<A>>,
     pub(crate) query_sets: Registry<QuerySet<A>>,
     pub(crate) buffers: Registry<Buffer<A>>,
     pub(crate) staging_buffers: Registry<StagingBuffer<A>>,
@@ -202,6 +204,7 @@ impl<A: HalApi> Hub<A> {
             render_bundles: Registry::new(A::VARIANT),
             render_pipelines: Registry::new(A::VARIANT),
             compute_pipelines: Registry::new(A::VARIANT),
+            pipeline_caches: Registry::new(A::VARIANT),
             query_sets: Registry::new(A::VARIANT),
             buffers: Registry::new(A::VARIANT),
             staging_buffers: Registry::new(A::VARIANT),
@@ -235,6 +238,7 @@ impl<A: HalApi> Hub<A> {
         self.pipeline_layouts.write().map.clear();
         self.compute_pipelines.write().map.clear();
         self.render_pipelines.write().map.clear();
+        self.pipeline_caches.write().map.clear();
         self.query_sets.write().map.clear();
 
         for element in surface_guard.map.iter() {
@@ -280,6 +284,7 @@ impl<A: HalApi> Hub<A> {
             render_bundles: self.render_bundles.generate_report(),
             render_pipelines: self.render_pipelines.generate_report(),
             compute_pipelines: self.compute_pipelines.generate_report(),
+            pipeline_caches: self.pipeline_caches.generate_report(),
             query_sets: self.query_sets.generate_report(),
             buffers: self.buffers.generate_report(),
             textures: self.textures.generate_report(),
diff --git a/wgpu-core/src/id.rs b/wgpu-core/src/id.rs
index 1fa89f2bf0..425ed1e140 100644
--- a/wgpu-core/src/id.rs
+++ b/wgpu-core/src/id.rs
@@ -313,6 +313,7 @@ ids! {
     pub type ShaderModuleId ShaderModule;
     pub type RenderPipelineId RenderPipeline;
     pub type ComputePipelineId ComputePipeline;
+    pub type PipelineCacheId PipelineCache;
     pub type CommandEncoderId CommandEncoder;
     pub type CommandBufferId CommandBuffer;
     pub type RenderPassEncoderId RenderPassEncoder;
diff --git a/wgpu-core/src/pipeline.rs b/wgpu-core/src/pipeline.rs
index d70b118d7e..9beff60d6a 100644
--- a/wgpu-core/src/pipeline.rs
+++ b/wgpu-core/src/pipeline.rs
@@ -259,6 +259,45 @@ impl<A: HalApi> ComputePipeline<A> {
     }
 }
 
+#[derive(Debug)]
+pub struct PipelineCache<A: HalApi> {
+    pub(crate) raw: Option<A::PipelineCache>,
+    pub(crate) device: Arc<Device<A>>,
+    pub(crate) info: ResourceInfo<PipelineCache<A>>,
+}
+
+impl<A: HalApi> Drop for PipelineCache<A> {
+    fn drop(&mut self) {
+        if let Some(raw) = self.raw.take() {
+            resource_log!("Destroy raw PipelineCache {:?}", self.info.label());
+
+            #[cfg(feature = "trace")]
+            if let Some(t) = self.device.trace.lock().as_mut() {
+                t.add(trace::Action::DestroyPipelineCache(self.info.id()));
+            }
+
+            unsafe {
+                use hal::Device;
+                self.device.raw().destroy_pipeline_cache(raw);
+            }
+        }
+    }
+}
+
+impl<A: HalApi> Resource for PipelineCache<A> {
+    const TYPE: ResourceType = "PipelineCache";
+
+    type Marker = crate::id::markers::PipelineCache;
+
+    fn as_info(&self) -> &ResourceInfo<Self> {
+        &self.info
+    }
+
+    fn as_info_mut(&mut self) -> &mut ResourceInfo<Self> {
+        &mut self.info
+    }
+}
+
 /// Describes how the vertex buffer is interpreted.
 #[derive(Clone, Debug)]
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
@@ -317,6 +356,13 @@ pub struct RenderPipelineDescriptor<'a> {
     pub multiview: Option<NonZeroU32>,
 }
 
+#[derive(Clone, Debug)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
+pub struct PipelineCacheDescriptor<'a> {
+    pub label: Label<'a>,
+    pub data: Option<Cow<'a, [u8]>>,
+}
+
 #[derive(Clone, Debug, Error)]
 #[non_exhaustive]
 pub enum ColorStateError {
diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs
index ad00da1b7f..4138e6e5ff 100644
--- a/wgpu-hal/src/empty.rs
+++ b/wgpu-hal/src/empty.rs
@@ -30,6 +30,7 @@ impl crate::Api for Api {
     type QuerySet = Resource;
     type Fence = Resource;
     type AccelerationStructure = Resource;
+    type PipelineCache = Resource;
 
     type BindGroupLayout = Resource;
     type BindGroup = Resource;
@@ -220,6 +221,13 @@ impl crate::Device for Context {
         Ok(Resource)
     }
     unsafe fn destroy_compute_pipeline(&self, pipeline: Resource) {}
+    unsafe fn create_pipeline_cache(
+        &self,
+        desc: &crate::PipelineCacheDescriptor<'_>,
+    ) -> Option<Resource> {
+        Some(Resource)
+    }
+    unsafe fn destroy_pipeline_cache(&self, cache: Resource) {}
 
     unsafe fn create_query_set(
         &self,
diff --git a/wgpu-hal/src/gles/device.rs b/wgpu-hal/src/gles/device.rs
index a1e2736aa6..bed69f3d96 100644
--- a/wgpu-hal/src/gles/device.rs
+++ b/wgpu-hal/src/gles/device.rs
@@ -1406,6 +1406,11 @@ impl crate::Device for super::Device {
         }
     }
 
+    unsafe fn create_pipeline_cache(&self, _: &crate::PipelineCacheDescriptor<'_>) -> Option<()> {
+        None
+    }
+    unsafe fn destroy_pipeline_cache(&self, (): ()) {}
+
     #[cfg_attr(target_arch = "wasm32", allow(unused))]
     unsafe fn create_query_set(
         &self,
diff --git a/wgpu-hal/src/gles/mod.rs b/wgpu-hal/src/gles/mod.rs
index 0fcb09be46..058bdcf6f3 100644
--- a/wgpu-hal/src/gles/mod.rs
+++ b/wgpu-hal/src/gles/mod.rs
@@ -154,6 +154,7 @@ impl crate::Api for Api {
     type QuerySet = QuerySet;
     type Fence = Fence;
     type AccelerationStructure = ();
+    type PipelineCache = ();
 
     type BindGroupLayout = BindGroupLayout;
     type BindGroup = BindGroup;
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index d300ca30cc..8f1b214e4d 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -432,6 +432,7 @@ pub trait Api: Clone + fmt::Debug + Sized {
     type ShaderModule: fmt::Debug + WasmNotSendSync;
     type RenderPipeline: fmt::Debug + WasmNotSendSync;
     type ComputePipeline: fmt::Debug + WasmNotSendSync;
+    type PipelineCache: fmt::Debug + WasmNotSendSync;
 
     type AccelerationStructure: fmt::Debug + WasmNotSendSync + 'static;
 }
@@ -611,6 +612,11 @@ pub trait Device: WasmNotSendSync {
         desc: &ComputePipelineDescriptor<Self::A>,
     ) -> Result<<Self::A as Api>::ComputePipeline, PipelineError>;
     unsafe fn destroy_compute_pipeline(&self, pipeline: <Self::A as Api>::ComputePipeline);
+    unsafe fn create_pipeline_cache(
+        &self,
+        desc: &PipelineCacheDescriptor<'_>,
+    ) -> Option<A::PipelineCache>;
+    unsafe fn destroy_pipeline_cache(&self, cache: A::PipelineCache);
 
     unsafe fn create_query_set(
         &self,
@@ -1638,6 +1644,11 @@ pub struct ComputePipelineDescriptor<'a, A: Api> {
     pub stage: ProgrammableStage<'a, A>,
 }
 
+pub struct PipelineCacheDescriptor<'a> {
+    pub label: Label<'a>,
+    pub data: Option<&'a [u8]>,
+}
+
 /// Describes how the vertex buffer is interpreted.
 #[derive(Clone, Debug)]
 pub struct VertexBufferLayout<'a> {
diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs
index ec392533a0..b9a98fce6c 100644
--- a/wgpu-hal/src/vulkan/device.rs
+++ b/wgpu-hal/src/vulkan/device.rs
@@ -1,4 +1,4 @@
-use super::conv;
+use super::{conv, PipelineCache};
 
 use arrayvec::ArrayVec;
 use ash::{extensions::khr, vk};
@@ -1943,6 +1943,26 @@ impl crate::Device for super::Device {
         unsafe { self.shared.raw.destroy_pipeline(pipeline.raw, None) };
     }
 
+    unsafe fn create_pipeline_cache(
+        &self,
+        desc: &crate::PipelineCacheDescriptor<'_>,
+    ) -> Option<PipelineCache> {
+        let mut info = vk::PipelineCacheCreateInfo::builder();
+        // TODO: Add additional validation to the data, as described in https://medium.com/@zeuxcg/creating-a-robust-pipeline-cache-with-vulkan-961d09416cda
+        if let Some(data) = desc.data {
+            info = info.initial_data(data)
+        }
+        // TODO: Proper error handling
+        let raw = {
+            profiling::scope!("vkCreatePipelineCache");
+            unsafe { self.shared.raw.create_pipeline_cache(&info, None) }.ok()?
+        };
+
+        Some(PipelineCache { raw })
+    }
+    unsafe fn destroy_pipeline_cache(&self, cache: PipelineCache) {
+        unsafe { self.shared.raw.destroy_pipeline_cache(cache.raw, None) }
+    }
     unsafe fn create_query_set(
         &self,
         desc: &wgt::QuerySetDescriptor<crate::Label>,
diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs
index d1ea82772e..53e7dfbf5a 100644
--- a/wgpu-hal/src/vulkan/mod.rs
+++ b/wgpu-hal/src/vulkan/mod.rs
@@ -73,6 +73,7 @@ impl crate::Api for Api {
     type QuerySet = QuerySet;
     type Fence = Fence;
     type AccelerationStructure = AccelerationStructure;
+    type PipelineCache = PipelineCache;
 
     type BindGroupLayout = BindGroupLayout;
     type BindGroup = BindGroup;
@@ -554,6 +555,11 @@ pub struct ComputePipeline {
     raw: vk::Pipeline,
 }
 
+#[derive(Debug)]
+pub struct PipelineCache {
+    raw: vk::PipelineCache,
+}
+
 #[derive(Debug)]
 pub struct QuerySet {
     raw: vk::QueryPool,
diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs
index 7049cd3a8d..1c2107e46c 100644
--- a/wgpu-types/src/lib.rs
+++ b/wgpu-types/src/lib.rs
@@ -1748,6 +1748,25 @@ pub struct AdapterInfo {
     pub backend: Backend,
 }
 
+impl AdapterInfo {
+    /// A recommended filename for storing the pipline cache of this adapter
+    ///
+    /// Each adapter may have a different filename, to allow using multiple caches
+    pub fn pipeline_cache_key(&self) -> Option<String> {
+        match self.backend {
+            Backend::Vulkan => Some(format!(
+                // The vendor/device should uniquely define a driver
+                // We will also later validate that the vendor and driver
+                // version match, which may lead to clearing an outdated
+                // cache for the same device.
+                "wgpu_pipeline_cache_vulkan_{}_{}",
+                self.vendor, self.device
+            )),
+            _ => None,
+        }
+    }
+}
+
 /// Describes a [`Device`](../wgpu/struct.Device.html).
 ///
 /// Corresponds to [WebGPU `GPUDeviceDescriptor`](
diff --git a/wgpu/src/backend/webgpu.rs b/wgpu/src/backend/webgpu.rs
index 2185d5b8b8..18f1a0f0f3 100644
--- a/wgpu/src/backend/webgpu.rs
+++ b/wgpu/src/backend/webgpu.rs
@@ -1159,6 +1159,8 @@ impl crate::context::Context for ContextWebGpu {
     type SurfaceOutputDetail = SurfaceOutputDetail;
     type SubmissionIndex = Unused;
     type SubmissionIndexData = ();
+    type PipelineCacheId = Unused;
+    type PipelineCacheData = ();
 
     type RequestAdapterFuture = MakeSendFuture<
         wasm_bindgen_futures::JsFuture,
@@ -1995,6 +1997,15 @@ impl crate::context::Context for ContextWebGpu {
         create_identified(device_data.0.create_compute_pipeline(&mapped_desc))
     }
 
+    unsafe fn device_create_pipeline_cache_init(
+        &self,
+        _: &Self::DeviceId,
+        _: &Self::DeviceData,
+        _: &PipelineCacheInitDescriptor<'_>,
+    ) -> Option<(Self::PipelineCacheId, Self::PipelineCacheData)> {
+        None
+    }
+
     fn device_create_buffer(
         &self,
         _device: &Self::DeviceId,
diff --git a/wgpu/src/backend/wgpu_core.rs b/wgpu/src/backend/wgpu_core.rs
index 65a3e39975..2e1f15edc0 100644
--- a/wgpu/src/backend/wgpu_core.rs
+++ b/wgpu/src/backend/wgpu_core.rs
@@ -4,10 +4,10 @@ use crate::{
     BufferDescriptor, CommandEncoderDescriptor, CompilationInfo, CompilationMessage,
     CompilationMessageType, ComputePassDescriptor, ComputePipelineDescriptor,
     DownlevelCapabilities, Features, Label, Limits, LoadOp, MapMode, Operations,
-    PipelineLayoutDescriptor, RenderBundleEncoderDescriptor, RenderPipelineDescriptor,
-    SamplerDescriptor, ShaderModuleDescriptor, ShaderModuleDescriptorSpirV, ShaderSource, StoreOp,
-    SurfaceStatus, SurfaceTargetUnsafe, TextureDescriptor, TextureViewDescriptor,
-    UncapturedErrorHandler,
+    PipelineCacheInitDescriptor, PipelineLayoutDescriptor, RenderBundleEncoderDescriptor,
+    RenderPipelineDescriptor, SamplerDescriptor, ShaderModuleDescriptor,
+    ShaderModuleDescriptorSpirV, ShaderSource, StoreOp, SurfaceStatus, SurfaceTargetUnsafe,
+    TextureDescriptor, TextureViewDescriptor, UncapturedErrorHandler,
 };
 
 use arrayvec::ArrayVec;
@@ -511,6 +511,8 @@ impl crate::Context for ContextWgpuCore {
     type RenderPipelineData = ();
     type ComputePipelineId = wgc::id::ComputePipelineId;
     type ComputePipelineData = ();
+    type PipelineCacheId = wgc::id::PipelineCacheId;
+    type PipelineCacheData = ();
     type CommandEncoderId = wgc::id::CommandEncoderId;
     type CommandEncoderData = CommandEncoder;
     type ComputePassId = Unused;
@@ -1259,6 +1261,52 @@ impl crate::Context for ContextWgpuCore {
         }
         (id, ())
     }
+
+    unsafe fn device_create_pipeline_cache_init(
+        &self,
+        device: &Self::DeviceId,
+        // TODO: Will be used for error handling
+        device_data: &Self::DeviceData,
+        desc: &PipelineCacheInitDescriptor<'_>,
+    ) -> Option<(Self::PipelineCacheId, Self::PipelineCacheData)> {
+        use wgc::pipeline as pipe;
+
+        let descriptor = pipe::PipelineCacheDescriptor {
+            label: desc.label.map(Borrowed),
+            data: Some(desc.data.into()),
+        };
+        let id = wgc::gfx_select!(device => self.0.device_create_pipeline_cache(
+            *device,
+            &descriptor,
+            None
+        ))?;
+        Some((id, ()))
+    }
+
+    fn device_create_pipeline_cache(
+        &self,
+        device: &Self::DeviceId,
+        // TODO: Will be used for error handling
+        device_data: &Self::DeviceData,
+        desc: &crate::PipelineCacheDescriptor<'_>,
+    ) -> Option<(Self::PipelineCacheId, Self::PipelineCacheData)> {
+        use wgc::pipeline as pipe;
+
+        let descriptor = pipe::PipelineCacheDescriptor {
+            label: desc.label.map(Borrowed),
+            data: None,
+        };
+        // Safety: data is None, so no safety concerns
+        let id = unsafe {
+            wgc::gfx_select!(device => self.0.device_create_pipeline_cache(
+                *device,
+                &descriptor,
+                None
+            ))
+        }?;
+        Some((id, ()))
+    }
+
     fn device_create_buffer(
         &self,
         device: &Self::DeviceId,
@@ -1718,6 +1766,14 @@ impl crate::Context for ContextWgpuCore {
         wgc::gfx_select!(*pipeline => self.0.render_pipeline_drop(*pipeline))
     }
 
+    fn pipeline_cache_drop(
+        &self,
+        cache: &Self::PipelineCacheId,
+        _cache_data: &Self::PipelineCacheData,
+    ) {
+        wgc::gfx_select!(*cache => self.0.pipeline_cache_drop(*cache))
+    }
+
     fn compute_pipeline_get_bind_group_layout(
         &self,
         pipeline: &Self::ComputePipelineId,
diff --git a/wgpu/src/context.rs b/wgpu/src/context.rs
index 12ea5cc903..ac975add9f 100644
--- a/wgpu/src/context.rs
+++ b/wgpu/src/context.rs
@@ -11,11 +11,12 @@ use crate::{
     AnyWasmNotSendSync, BindGroupDescriptor, BindGroupLayoutDescriptor, Buffer, BufferAsyncError,
     BufferDescriptor, CommandEncoderDescriptor, CompilationInfo, ComputePassDescriptor,
     ComputePipelineDescriptor, DeviceDescriptor, Error, ErrorFilter, ImageCopyBuffer,
-    ImageCopyTexture, Maintain, MaintainResult, MapMode, PipelineLayoutDescriptor,
-    QuerySetDescriptor, RenderBundleDescriptor, RenderBundleEncoderDescriptor,
-    RenderPassDescriptor, RenderPipelineDescriptor, RequestAdapterOptions, RequestDeviceError,
-    SamplerDescriptor, ShaderModuleDescriptor, ShaderModuleDescriptorSpirV, SurfaceTargetUnsafe,
-    Texture, TextureDescriptor, TextureViewDescriptor, UncapturedErrorHandler,
+    ImageCopyTexture, Maintain, MaintainResult, MapMode, PipelineCacheDescriptor,
+    PipelineCacheInitDescriptor, PipelineLayoutDescriptor, QuerySetDescriptor,
+    RenderBundleDescriptor, RenderBundleEncoderDescriptor, RenderPassDescriptor,
+    RenderPipelineDescriptor, RequestAdapterOptions, RequestDeviceError, SamplerDescriptor,
+    ShaderModuleDescriptor, ShaderModuleDescriptorSpirV, SurfaceTargetUnsafe, Texture,
+    TextureDescriptor, TextureViewDescriptor, UncapturedErrorHandler,
 };
 
 /// Meta trait for an id tracked by a context.
@@ -59,6 +60,8 @@ pub trait Context: Debug + WasmNotSendSync + Sized {
     type RenderPipelineData: ContextData;
     type ComputePipelineId: ContextId + WasmNotSendSync;
     type ComputePipelineData: ContextData;
+    type PipelineCacheId: ContextId + WasmNotSendSync;
+    type PipelineCacheData: ContextData;
     type CommandEncoderId: ContextId + WasmNotSendSync;
     type CommandEncoderData: ContextData;
     type ComputePassId: ContextId;
@@ -233,6 +236,18 @@ pub trait Context: Debug + WasmNotSendSync + Sized {
         device_data: &Self::DeviceData,
         desc: &ComputePipelineDescriptor<'_>,
     ) -> (Self::ComputePipelineId, Self::ComputePipelineData);
+    unsafe fn device_create_pipeline_cache_init(
+        &self,
+        device: &Self::DeviceId,
+        device_data: &Self::DeviceData,
+        desc: &PipelineCacheInitDescriptor<'_>,
+    ) -> Option<(Self::PipelineCacheId, Self::PipelineCacheData)>;
+    fn device_create_pipeline_cache(
+        &self,
+        device: &Self::DeviceId,
+        device_data: &Self::DeviceData,
+        desc: &PipelineCacheDescriptor<'_>,
+    ) -> Option<(Self::PipelineCacheId, Self::PipelineCacheData)>;
     fn device_create_buffer(
         &self,
         device: &Self::DeviceId,
@@ -395,6 +410,11 @@ pub trait Context: Debug + WasmNotSendSync + Sized {
         pipeline: &Self::RenderPipelineId,
         pipeline_data: &Self::RenderPipelineData,
     );
+    fn pipeline_cache_drop(
+        &self,
+        cache: &Self::PipelineCacheId,
+        cache_data: &Self::PipelineCacheData,
+    );
 
     fn compute_pipeline_get_bind_group_layout(
         &self,
@@ -1271,6 +1291,18 @@ pub(crate) trait DynContext: Debug + WasmNotSendSync {
         device_data: &crate::Data,
         desc: &ComputePipelineDescriptor<'_>,
     ) -> (ObjectId, Box<crate::Data>);
+    unsafe fn device_create_pipeline_cache_init(
+        &self,
+        device: &ObjectId,
+        device_data: &crate::Data,
+        desc: &PipelineCacheInitDescriptor<'_>,
+    ) -> Option<(ObjectId, Box<crate::Data>)>;
+    fn device_create_pipeline_cache(
+        &self,
+        device: &ObjectId,
+        device_data: &crate::Data,
+        desc: &PipelineCacheDescriptor<'_>,
+    ) -> Option<(ObjectId, Box<crate::Data>)>;
     fn device_create_buffer(
         &self,
         device: &ObjectId,
@@ -1391,6 +1423,7 @@ pub(crate) trait DynContext: Debug + WasmNotSendSync {
     fn render_bundle_drop(&self, render_bundle: &ObjectId, render_bundle_data: &crate::Data);
     fn compute_pipeline_drop(&self, pipeline: &ObjectId, pipeline_data: &crate::Data);
     fn render_pipeline_drop(&self, pipeline: &ObjectId, pipeline_data: &crate::Data);
+    fn pipeline_cache_drop(&self, cache: &ObjectId, _cache_data: &crate::Data);
 
     fn compute_pipeline_get_bind_group_layout(
         &self,
@@ -2297,6 +2330,33 @@ where
         (compute_pipeline.into(), Box::new(data) as _)
     }
 
+    unsafe fn device_create_pipeline_cache_init(
+        &self,
+        device: &ObjectId,
+        device_data: &crate::Data,
+        desc: &PipelineCacheInitDescriptor<'_>,
+    ) -> Option<(ObjectId, Box<crate::Data>)> {
+        let device = <T::DeviceId>::from(*device);
+        let device_data = downcast_ref(device_data);
+        let (pipeline_cache, data) = unsafe {
+            Context::device_create_pipeline_cache_init(self, &device, device_data, desc)
+        }?;
+        Some((pipeline_cache.into(), Box::new(data) as _))
+    }
+
+    fn device_create_pipeline_cache(
+        &self,
+        device: &ObjectId,
+        device_data: &crate::Data,
+        desc: &PipelineCacheDescriptor<'_>,
+    ) -> Option<(ObjectId, Box<crate::Data>)> {
+        let device = <T::DeviceId>::from(*device);
+        let device_data = downcast_ref(device_data);
+        let (pipeline_cache, data) =
+            Context::device_create_pipeline_cache(self, &device, device_data, desc)?;
+        Some((pipeline_cache.into(), Box::new(data) as _))
+    }
+
     fn device_create_buffer(
         &self,
         device: &ObjectId,
@@ -2621,6 +2681,12 @@ where
         Context::render_pipeline_drop(self, &pipeline, pipeline_data)
     }
 
+    fn pipeline_cache_drop(&self, cache: &ObjectId, cache_data: &crate::Data) {
+        let cache = <T::PipelineCacheId>::from(*cache);
+        let cache_data = downcast_ref(cache_data);
+        Context::pipeline_cache_drop(self, &cache, cache_data)
+    }
+
     fn compute_pipeline_get_bind_group_layout(
         &self,
         pipeline: &ObjectId,
diff --git a/wgpu/src/lib.rs b/wgpu/src/lib.rs
index 3f64261a6d..a86a4441f7 100644
--- a/wgpu/src/lib.rs
+++ b/wgpu/src/lib.rs
@@ -1111,6 +1111,39 @@ impl ComputePipeline {
     }
 }
 
+/// Handle to a pipeline cache, which is used to accelerate
+/// creating [`RenderPipeline`]s and [`ComputePipeline`]s
+/// in subsequent executions
+///
+/// This type is unique to the Rust API of `wgpu`.
+#[derive(Debug)]
+pub struct PipelineCache {
+    context: Arc<C>,
+    id: ObjectId,
+    data: Box<Data>,
+}
+
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(PipelineCache: Send, Sync);
+
+impl PipelineCache {
+    /// Get the data associated with this pipeline cache.
+    /// The format is unspecified, and should be passed to a call to
+    /// [`Device::create_pipeline_cache`] for a compatible device.
+    pub fn get_data() -> Option<Vec<u8>> {
+        None
+    }
+}
+
+impl Drop for PipelineCache {
+    fn drop(&mut self) {
+        if !thread::panicking() {
+            self.context
+                .pipeline_cache_drop(&self.id, self.data.as_ref());
+        }
+    }
+}
+
 /// Handle to a command buffer on the GPU.
 ///
 /// A `CommandBuffer` represents a complete sequence of commands that may be submitted to a command
@@ -1939,6 +1972,21 @@ pub struct ComputePipelineDescriptor<'a> {
 #[cfg(send_sync)]
 static_assertions::assert_impl_all!(ComputePipelineDescriptor<'_>: Send, Sync);
 
+#[derive(Clone, Debug)]
+pub struct PipelineCacheInitDescriptor<'a> {
+    pub label: Label<'a>,
+    pub data: &'a [u8],
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(PipelineCacheInitDescriptor<'_>: Send, Sync);
+
+#[derive(Clone, Debug)]
+pub struct PipelineCacheDescriptor<'a> {
+    pub label: Label<'a>,
+}
+#[cfg(send_sync)]
+static_assertions::assert_impl_all!(PipelineCacheDescriptor<'_>: Send, Sync);
+
 pub use wgt::ImageCopyBuffer as ImageCopyBufferBase;
 /// View of a buffer which can be used to copy to/from a texture.
 ///
@@ -3086,6 +3134,43 @@ impl Device {
     pub fn make_invalid(&self) {
         DynContext::device_make_invalid(&*self.context, &self.id, self.data.as_ref())
     }
+
+    pub unsafe fn create_pipeline_cache_init(
+        &self,
+        desc: &PipelineCacheInitDescriptor<'_>,
+        // TODO: Work out error handling and conditions
+    ) -> Option<PipelineCache> {
+        let (id, data) = unsafe {
+            DynContext::device_create_pipeline_cache_init(
+                &*self.context,
+                &self.id,
+                self.data.as_ref(),
+                desc,
+            )
+        }?;
+        Some(PipelineCache {
+            context: Arc::clone(&self.context),
+            id,
+            data,
+        })
+    }
+
+    pub fn create_pipeline_cache(
+        &self,
+        desc: &PipelineCacheDescriptor<'_>,
+    ) -> Option<PipelineCache> {
+        let (id, data) = DynContext::device_create_pipeline_cache(
+            &*self.context,
+            &self.id,
+            self.data.as_ref(),
+            desc,
+        )?;
+        Some(PipelineCache {
+            context: Arc::clone(&self.context),
+            id,
+            data,
+        })
+    }
 }
 
 impl Drop for Device {

From ef88b7c5eb7a3fe486a866ede7a2de2660a766aa Mon Sep 17 00:00:00 2001
From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com>
Date: Tue, 27 Feb 2024 14:42:32 +0000
Subject: [PATCH 02/31] Introduce the cache API to the frontend

Fix some CI issues
---
 deno_webgpu/pipeline.rs                   | 2 ++
 examples/src/boids/mod.rs                 | 1 +
 examples/src/bunnymark/mod.rs             | 1 +
 examples/src/conservative_raster/mod.rs   | 4 ++++
 examples/src/cube/mod.rs                  | 2 ++
 examples/src/hello_triangle/mod.rs        | 1 +
 examples/src/mipmap/mod.rs                | 2 ++
 examples/src/msaa_line/mod.rs             | 1 +
 examples/src/render_to_texture/mod.rs     | 1 +
 examples/src/shadow/mod.rs                | 2 ++
 examples/src/skybox/mod.rs                | 2 ++
 examples/src/srgb_blend/mod.rs            | 1 +
 examples/src/stencil_triangles/mod.rs     | 2 ++
 examples/src/texture_arrays/mod.rs        | 1 +
 examples/src/timestamp_queries/mod.rs     | 2 +-
 examples/src/uniform_values/mod.rs        | 2 +-
 examples/src/water/mod.rs                 | 3 +++
 tests/tests/bgra8unorm_storage.rs         | 1 +
 tests/tests/device.rs                     | 1 +
 tests/tests/mem_leaks.rs                  | 1 +
 tests/tests/nv12_texture/mod.rs           | 1 +
 tests/tests/occlusion_query/mod.rs        | 1 +
 tests/tests/regression/issue_3349.rs      | 1 +
 tests/tests/regression/issue_3457.rs      | 2 ++
 tests/tests/scissor_tests/mod.rs          | 1 +
 tests/tests/shader_primitive_index/mod.rs | 1 +
 tests/tests/shader_view_format/mod.rs     | 1 +
 tests/tests/vertex_indices/mod.rs         | 1 +
 wgpu-core/src/device/resource.rs          | 5 +++--
 wgpu-core/src/pipeline.rs                 | 4 +++-
 wgpu-hal/src/dx12/device.rs               | 8 ++++++++
 wgpu-hal/src/dx12/mod.rs                  | 1 +
 wgpu-hal/src/metal/device.rs              | 8 ++++++++
 wgpu-hal/src/metal/mod.rs                 | 1 +
 wgpu-types/src/lib.rs                     | 2 +-
 wgpu/src/backend/wgpu_core.rs             | 2 ++
 wgpu/src/lib.rs                           | 2 ++
 37 files changed, 69 insertions(+), 6 deletions(-)

diff --git a/deno_webgpu/pipeline.rs b/deno_webgpu/pipeline.rs
index e8b5a71cf0..b4d2f8d36e 100644
--- a/deno_webgpu/pipeline.rs
+++ b/deno_webgpu/pipeline.rs
@@ -115,6 +115,7 @@ pub fn op_webgpu_create_compute_pipeline(
             constants: Cow::Owned(compute.constants),
             zero_initialize_workgroup_memory: true,
         },
+        cache: None,
     };
     let implicit_pipelines = match layout {
         GPUPipelineLayoutOrGPUAutoLayoutMode::Layout(_) => None,
@@ -395,6 +396,7 @@ pub fn op_webgpu_create_render_pipeline(
         multisample: args.multisample,
         fragment,
         multiview: None,
+        cache: None,
     };
 
     let implicit_pipelines = match args.layout {
diff --git a/examples/src/boids/mod.rs b/examples/src/boids/mod.rs
index 6c8bb6e76c..67c69d349b 100644
--- a/examples/src/boids/mod.rs
+++ b/examples/src/boids/mod.rs
@@ -156,6 +156,7 @@ impl crate::framework::Example for Example {
             depth_stencil: None,
             multisample: wgpu::MultisampleState::default(),
             multiview: None,
+            cache: None,
         });
 
         // create compute pipeline
diff --git a/examples/src/bunnymark/mod.rs b/examples/src/bunnymark/mod.rs
index 679fc5014a..b5b33b54d5 100644
--- a/examples/src/bunnymark/mod.rs
+++ b/examples/src/bunnymark/mod.rs
@@ -224,6 +224,7 @@ impl crate::framework::Example for Example {
             depth_stencil: None,
             multisample: wgpu::MultisampleState::default(),
             multiview: None,
+            cache: None,
         });
 
         let texture = {
diff --git a/examples/src/conservative_raster/mod.rs b/examples/src/conservative_raster/mod.rs
index 89500a798f..116ed8623b 100644
--- a/examples/src/conservative_raster/mod.rs
+++ b/examples/src/conservative_raster/mod.rs
@@ -113,6 +113,7 @@ impl crate::framework::Example for Example {
                 depth_stencil: None,
                 multisample: wgpu::MultisampleState::default(),
                 multiview: None,
+                cache: None,
             });
 
         let pipeline_triangle_regular =
@@ -135,6 +136,7 @@ impl crate::framework::Example for Example {
                 depth_stencil: None,
                 multisample: wgpu::MultisampleState::default(),
                 multiview: None,
+                cache: None,
             });
 
         let pipeline_lines = if device
@@ -165,6 +167,7 @@ impl crate::framework::Example for Example {
                     depth_stencil: None,
                     multisample: wgpu::MultisampleState::default(),
                     multiview: None,
+                    cache: None,
                 }),
             )
         } else {
@@ -224,6 +227,7 @@ impl crate::framework::Example for Example {
                     depth_stencil: None,
                     multisample: wgpu::MultisampleState::default(),
                     multiview: None,
+                    cache: None,
                 }),
                 bind_group_layout,
             )
diff --git a/examples/src/cube/mod.rs b/examples/src/cube/mod.rs
index 9347627812..9828157e57 100644
--- a/examples/src/cube/mod.rs
+++ b/examples/src/cube/mod.rs
@@ -260,6 +260,7 @@ impl crate::framework::Example for Example {
             depth_stencil: None,
             multisample: wgpu::MultisampleState::default(),
             multiview: None,
+            cache: None,
         });
 
         let pipeline_wire = if device
@@ -301,6 +302,7 @@ impl crate::framework::Example for Example {
                 depth_stencil: None,
                 multisample: wgpu::MultisampleState::default(),
                 multiview: None,
+                cache: None,
             });
             Some(pipeline_wire)
         } else {
diff --git a/examples/src/hello_triangle/mod.rs b/examples/src/hello_triangle/mod.rs
index 79162a6956..e4d42674f7 100644
--- a/examples/src/hello_triangle/mod.rs
+++ b/examples/src/hello_triangle/mod.rs
@@ -72,6 +72,7 @@ async fn run(event_loop: EventLoop<()>, window: Window) {
         depth_stencil: None,
         multisample: wgpu::MultisampleState::default(),
         multiview: None,
+        cache: None,
     });
 
     let mut config = surface
diff --git a/examples/src/mipmap/mod.rs b/examples/src/mipmap/mod.rs
index 0848e94e10..eaed9c82e7 100644
--- a/examples/src/mipmap/mod.rs
+++ b/examples/src/mipmap/mod.rs
@@ -109,6 +109,7 @@ impl Example {
             depth_stencil: None,
             multisample: wgpu::MultisampleState::default(),
             multiview: None,
+            cache: None,
         });
 
         let bind_group_layout = pipeline.get_bind_group_layout(0);
@@ -310,6 +311,7 @@ impl crate::framework::Example for Example {
             depth_stencil: None,
             multisample: wgpu::MultisampleState::default(),
             multiview: None,
+            cache: None,
         });
 
         // Create bind group
diff --git a/examples/src/msaa_line/mod.rs b/examples/src/msaa_line/mod.rs
index cd22e75bc4..46bb743e99 100644
--- a/examples/src/msaa_line/mod.rs
+++ b/examples/src/msaa_line/mod.rs
@@ -78,6 +78,7 @@ impl Example {
                 ..Default::default()
             },
             multiview: None,
+            cache: None,
         });
         let mut encoder =
             device.create_render_bundle_encoder(&wgpu::RenderBundleEncoderDescriptor {
diff --git a/examples/src/render_to_texture/mod.rs b/examples/src/render_to_texture/mod.rs
index 5e571dc74e..caed736741 100644
--- a/examples/src/render_to_texture/mod.rs
+++ b/examples/src/render_to_texture/mod.rs
@@ -72,6 +72,7 @@ async fn run(_path: Option<String>) {
         depth_stencil: None,
         multisample: wgpu::MultisampleState::default(),
         multiview: None,
+        cache: None,
     });
 
     log::info!("Wgpu context set up.");
diff --git a/examples/src/shadow/mod.rs b/examples/src/shadow/mod.rs
index 2cb6d6f3e2..b2c27f5892 100644
--- a/examples/src/shadow/mod.rs
+++ b/examples/src/shadow/mod.rs
@@ -526,6 +526,7 @@ impl crate::framework::Example for Example {
                 }),
                 multisample: wgpu::MultisampleState::default(),
                 multiview: None,
+                cache: None,
             });
 
             Pass {
@@ -660,6 +661,7 @@ impl crate::framework::Example for Example {
                 }),
                 multisample: wgpu::MultisampleState::default(),
                 multiview: None,
+                cache: None,
             });
 
             Pass {
diff --git a/examples/src/skybox/mod.rs b/examples/src/skybox/mod.rs
index 35a4266d20..e526feedae 100644
--- a/examples/src/skybox/mod.rs
+++ b/examples/src/skybox/mod.rs
@@ -221,6 +221,7 @@ impl crate::framework::Example for Example {
             }),
             multisample: wgpu::MultisampleState::default(),
             multiview: None,
+            cache: None,
         });
         let entity_pipeline = device.create_render_pipeline(&wgpu::RenderPipelineDescriptor {
             label: Some("Entity"),
@@ -254,6 +255,7 @@ impl crate::framework::Example for Example {
             }),
             multisample: wgpu::MultisampleState::default(),
             multiview: None,
+            cache: None,
         });
 
         let sampler = device.create_sampler(&wgpu::SamplerDescriptor {
diff --git a/examples/src/srgb_blend/mod.rs b/examples/src/srgb_blend/mod.rs
index f701aff989..314fc92df2 100644
--- a/examples/src/srgb_blend/mod.rs
+++ b/examples/src/srgb_blend/mod.rs
@@ -151,6 +151,7 @@ impl<const SRGB: bool> crate::framework::Example for Example<SRGB> {
             depth_stencil: None,
             multisample: wgpu::MultisampleState::default(),
             multiview: None,
+            cache: None,
         });
 
         // Done
diff --git a/examples/src/stencil_triangles/mod.rs b/examples/src/stencil_triangles/mod.rs
index e0f495177f..8d638d20d1 100644
--- a/examples/src/stencil_triangles/mod.rs
+++ b/examples/src/stencil_triangles/mod.rs
@@ -106,6 +106,7 @@ impl crate::framework::Example for Example {
             }),
             multisample: wgpu::MultisampleState::default(),
             multiview: None,
+            cache: None,
         });
 
         let outer_pipeline = device.create_render_pipeline(&wgpu::RenderPipelineDescriptor {
@@ -141,6 +142,7 @@ impl crate::framework::Example for Example {
             }),
             multisample: wgpu::MultisampleState::default(),
             multiview: None,
+            cache: None,
         });
 
         let stencil_buffer = device.create_texture(&wgpu::TextureDescriptor {
diff --git a/examples/src/texture_arrays/mod.rs b/examples/src/texture_arrays/mod.rs
index dd7b4ec89a..b0f474b957 100644
--- a/examples/src/texture_arrays/mod.rs
+++ b/examples/src/texture_arrays/mod.rs
@@ -341,6 +341,7 @@ impl crate::framework::Example for Example {
             depth_stencil: None,
             multisample: wgpu::MultisampleState::default(),
             multiview: None,
+            cache: None
         });
 
         Self {
diff --git a/examples/src/timestamp_queries/mod.rs b/examples/src/timestamp_queries/mod.rs
index 7042d60fe9..0d8345ddfa 100644
--- a/examples/src/timestamp_queries/mod.rs
+++ b/examples/src/timestamp_queries/mod.rs
@@ -366,8 +366,8 @@ fn render_pass(
         depth_stencil: None,
         multisample: wgpu::MultisampleState::default(),
         multiview: None,
+        cache: None,
     });
-
     let render_target = device.create_texture(&wgpu::TextureDescriptor {
         label: Some("rendertarget"),
         size: wgpu::Extent3d {
diff --git a/examples/src/uniform_values/mod.rs b/examples/src/uniform_values/mod.rs
index 932c7aaeec..c53a189722 100644
--- a/examples/src/uniform_values/mod.rs
+++ b/examples/src/uniform_values/mod.rs
@@ -192,8 +192,8 @@ impl WgpuContext {
             depth_stencil: None,
             multisample: wgpu::MultisampleState::default(),
             multiview: None,
+            cache: None,
         });
-
         let surface_config = surface
             .get_default_config(&adapter, size.width, size.height)
             .unwrap();
diff --git a/examples/src/water/mod.rs b/examples/src/water/mod.rs
index 94f12895a8..2aefa85c6b 100644
--- a/examples/src/water/mod.rs
+++ b/examples/src/water/mod.rs
@@ -574,6 +574,8 @@ impl crate::framework::Example for Example {
             // No multisampling is used.
             multisample: wgpu::MultisampleState::default(),
             multiview: None,
+            // Pipeline caching is not used
+            cache: None,
         });
 
         // Same idea as the water pipeline.
@@ -610,6 +612,7 @@ impl crate::framework::Example for Example {
             }),
             multisample: wgpu::MultisampleState::default(),
             multiview: None,
+            cache: None
         });
 
         // A render bundle to draw the terrain.
diff --git a/tests/tests/bgra8unorm_storage.rs b/tests/tests/bgra8unorm_storage.rs
index 17082a9ed4..7bc117f097 100644
--- a/tests/tests/bgra8unorm_storage.rs
+++ b/tests/tests/bgra8unorm_storage.rs
@@ -98,6 +98,7 @@ static BGRA8_UNORM_STORAGE: GpuTestConfiguration = GpuTestConfiguration::new()
             entry_point: "main",
             compilation_options: Default::default(),
             module: &module,
+            cache: None,
         });
 
         let mut encoder =
diff --git a/tests/tests/device.rs b/tests/tests/device.rs
index 649a850fa9..be3d3757ae 100644
--- a/tests/tests/device.rs
+++ b/tests/tests/device.rs
@@ -488,6 +488,7 @@ static DEVICE_DESTROY_THEN_MORE: GpuTestConfiguration = GpuTestConfiguration::ne
                     multisample: wgpu::MultisampleState::default(),
                     fragment: None,
                     multiview: None,
+                    cache: None,
                 });
         });
 
diff --git a/tests/tests/mem_leaks.rs b/tests/tests/mem_leaks.rs
index 7002ebabe0..3c59aec036 100644
--- a/tests/tests/mem_leaks.rs
+++ b/tests/tests/mem_leaks.rs
@@ -113,6 +113,7 @@ async fn draw_test_with_reports(
                 })],
             }),
             multiview: None,
+            cache: None,
         });
 
     let global_report = ctx.instance.generate_report().unwrap();
diff --git a/tests/tests/nv12_texture/mod.rs b/tests/tests/nv12_texture/mod.rs
index 70ee849831..fa386f8653 100644
--- a/tests/tests/nv12_texture/mod.rs
+++ b/tests/tests/nv12_texture/mod.rs
@@ -41,6 +41,7 @@ static NV12_TEXTURE_CREATION_SAMPLING: GpuTestConfiguration = GpuTestConfigurati
                 depth_stencil: None,
                 multisample: wgpu::MultisampleState::default(),
                 multiview: None,
+                cache: None,
             });
 
         let tex = ctx.device.create_texture(&wgpu::TextureDescriptor {
diff --git a/tests/tests/occlusion_query/mod.rs b/tests/tests/occlusion_query/mod.rs
index 1a68ecf79d..a888320e28 100644
--- a/tests/tests/occlusion_query/mod.rs
+++ b/tests/tests/occlusion_query/mod.rs
@@ -51,6 +51,7 @@ static OCCLUSION_QUERY: GpuTestConfiguration = GpuTestConfiguration::new()
                 }),
                 multisample: wgpu::MultisampleState::default(),
                 multiview: None,
+                cache: None,
             });
 
         // Create occlusion query set
diff --git a/tests/tests/regression/issue_3349.rs b/tests/tests/regression/issue_3349.rs
index 74c466b45a..35d35e5bdf 100644
--- a/tests/tests/regression/issue_3349.rs
+++ b/tests/tests/regression/issue_3349.rs
@@ -119,6 +119,7 @@ async fn multi_stage_data_binding_test(ctx: TestingContext) {
             depth_stencil: None,
             multisample: wgpu::MultisampleState::default(),
             multiview: None,
+            cache: None,
         });
 
     let texture = ctx.device.create_texture(&wgpu::TextureDescriptor {
diff --git a/tests/tests/regression/issue_3457.rs b/tests/tests/regression/issue_3457.rs
index f18d681ae1..f0f7e64636 100644
--- a/tests/tests/regression/issue_3457.rs
+++ b/tests/tests/regression/issue_3457.rs
@@ -80,6 +80,7 @@ static PASS_RESET_VERTEX_BUFFER: GpuTestConfiguration =
                     })],
                 }),
                 multiview: None,
+                cache: None,
             });
 
         let single_pipeline = ctx
@@ -111,6 +112,7 @@ static PASS_RESET_VERTEX_BUFFER: GpuTestConfiguration =
                     })],
                 }),
                 multiview: None,
+                cache: None,
             });
 
         let view = ctx
diff --git a/tests/tests/scissor_tests/mod.rs b/tests/tests/scissor_tests/mod.rs
index 15c35644e5..3f1e7df135 100644
--- a/tests/tests/scissor_tests/mod.rs
+++ b/tests/tests/scissor_tests/mod.rs
@@ -61,6 +61,7 @@ async fn scissor_test_impl(
                 })],
             }),
             multiview: None,
+            cache: None,
         });
 
     let readback_buffer = image::ReadbackBuffers::new(&ctx.device, &texture);
diff --git a/tests/tests/shader_primitive_index/mod.rs b/tests/tests/shader_primitive_index/mod.rs
index fb43397830..9972f81aa1 100644
--- a/tests/tests/shader_primitive_index/mod.rs
+++ b/tests/tests/shader_primitive_index/mod.rs
@@ -147,6 +147,7 @@ async fn pulling_common(
                 })],
             }),
             multiview: None,
+            cache: None,
         });
 
     let width = 2;
diff --git a/tests/tests/shader_view_format/mod.rs b/tests/tests/shader_view_format/mod.rs
index 53c642bf7a..d34b8d851d 100644
--- a/tests/tests/shader_view_format/mod.rs
+++ b/tests/tests/shader_view_format/mod.rs
@@ -109,6 +109,7 @@ async fn reinterpret(
             depth_stencil: None,
             multisample: wgpu::MultisampleState::default(),
             multiview: None,
+            cache: None,
         });
     let bind_group = ctx.device.create_bind_group(&wgpu::BindGroupDescriptor {
         layout: &pipeline.get_bind_group_layout(0),
diff --git a/tests/tests/vertex_indices/mod.rs b/tests/tests/vertex_indices/mod.rs
index cad7e731d1..7bd172d850 100644
--- a/tests/tests/vertex_indices/mod.rs
+++ b/tests/tests/vertex_indices/mod.rs
@@ -295,6 +295,7 @@ async fn vertex_index_common(ctx: TestingContext) {
             })],
         }),
         multiview: None,
+        cache: None,
     };
     let builtin_pipeline = ctx.device.create_render_pipeline(&pipeline_desc);
     pipeline_desc.vertex.entry_point = "vs_main_buffers";
diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index 81f775f646..a6e3c8d998 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -3497,11 +3497,12 @@ impl<A: HalApi> Device<A> {
             data: desc.data.as_deref(),
             label: desc.label.to_hal(self.instance_flags),
         };
-        let raw = unsafe { (&self.raw.as_ref().unwrap()).create_pipeline_cache(&cache_desc) };
+        let raw = unsafe { (&self.raw.as_ref().unwrap()).create_pipeline_cache(&cache_desc) }?;
         let cache = pipeline::PipelineCache {
             device: self.clone(),
             info: ResourceInfo::new(desc.label.borrow_or_default()),
-            raw,
+            // This would be none in the error condition, which we don't implement yet
+            raw: Some(raw),
         };
         Some(cache)
     }
diff --git a/wgpu-core/src/pipeline.rs b/wgpu-core/src/pipeline.rs
index 9beff60d6a..67d6852cb1 100644
--- a/wgpu-core/src/pipeline.rs
+++ b/wgpu-core/src/pipeline.rs
@@ -5,7 +5,7 @@ use crate::{
     command::ColorAttachmentError,
     device::{Device, DeviceError, MissingDownlevelFlags, MissingFeatures, RenderPassContext},
     hal_api::HalApi,
-    id::{PipelineLayoutId, ShaderModuleId},
+    id::{PipelineCacheId, PipelineLayoutId, ShaderModuleId},
     resource::{Resource, ResourceInfo, ResourceType},
     resource_log, validation, Label,
 };
@@ -192,6 +192,7 @@ pub struct ComputePipelineDescriptor<'a> {
     pub layout: Option<PipelineLayoutId>,
     /// The compiled compute stage and its entry point.
     pub stage: ProgrammableStageDescriptor<'a>,
+    pub cache: Option<PipelineCacheId>,
 }
 
 #[derive(Clone, Debug, Error)]
@@ -354,6 +355,7 @@ pub struct RenderPipelineDescriptor<'a> {
     /// If the pipeline will be used with a multiview render pass, this indicates how many array
     /// layers the attachments will have.
     pub multiview: Option<NonZeroU32>,
+    pub cache: Option<PipelineCacheId>,
 }
 
 #[derive(Clone, Debug)]
diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs
index 82075294ee..86709b2434 100644
--- a/wgpu-hal/src/dx12/device.rs
+++ b/wgpu-hal/src/dx12/device.rs
@@ -1517,6 +1517,14 @@ impl crate::Device for super::Device {
     }
     unsafe fn destroy_compute_pipeline(&self, _pipeline: super::ComputePipeline) {}
 
+    unsafe fn create_pipeline_cache(
+        &self,
+        desc: &crate::PipelineCacheDescriptor<'_>,
+    ) -> Option<()> {
+        None
+    }
+    unsafe fn destroy_pipeline_cache(&self, (): ()) {}
+
     unsafe fn create_query_set(
         &self,
         desc: &wgt::QuerySetDescriptor<crate::Label>,
diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs
index 9f021bc241..906264ae36 100644
--- a/wgpu-hal/src/dx12/mod.rs
+++ b/wgpu-hal/src/dx12/mod.rs
@@ -82,6 +82,7 @@ impl crate::Api for Api {
     type ShaderModule = ShaderModule;
     type RenderPipeline = RenderPipeline;
     type ComputePipeline = ComputePipeline;
+    type PipelineCache = ();
 
     type AccelerationStructure = AccelerationStructure;
 }
diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs
index 2c8f5a2bfb..96f03aa66f 100644
--- a/wgpu-hal/src/metal/device.rs
+++ b/wgpu-hal/src/metal/device.rs
@@ -1099,6 +1099,14 @@ impl crate::Device for super::Device {
     }
     unsafe fn destroy_compute_pipeline(&self, _pipeline: super::ComputePipeline) {}
 
+    unsafe fn create_pipeline_cache(
+        &self,
+        desc: &crate::PipelineCacheDescriptor<'_>,
+    ) -> Option<()> {
+        None
+    }
+    unsafe fn destroy_pipeline_cache(&self, (): ()) {}
+
     unsafe fn create_query_set(
         &self,
         desc: &wgt::QuerySetDescriptor<crate::Label>,
diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs
index 7d547cfe3c..a5ea63b035 100644
--- a/wgpu-hal/src/metal/mod.rs
+++ b/wgpu-hal/src/metal/mod.rs
@@ -66,6 +66,7 @@ impl crate::Api for Api {
     type ShaderModule = ShaderModule;
     type RenderPipeline = RenderPipeline;
     type ComputePipeline = ComputePipeline;
+    type PipelineCache = ();
 
     type AccelerationStructure = AccelerationStructure;
 }
diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs
index 1c2107e46c..b88546ee48 100644
--- a/wgpu-types/src/lib.rs
+++ b/wgpu-types/src/lib.rs
@@ -1749,7 +1749,7 @@ pub struct AdapterInfo {
 }
 
 impl AdapterInfo {
-    /// A recommended filename for storing the pipline cache of this adapter
+    /// A recommended filename for storing the pipeline cache of this adapter
     ///
     /// Each adapter may have a different filename, to allow using multiple caches
     pub fn pipeline_cache_key(&self) -> Option<String> {
diff --git a/wgpu/src/backend/wgpu_core.rs b/wgpu/src/backend/wgpu_core.rs
index 2e1f15edc0..b88f00fd25 100644
--- a/wgpu/src/backend/wgpu_core.rs
+++ b/wgpu/src/backend/wgpu_core.rs
@@ -1185,6 +1185,7 @@ impl crate::Context for ContextWgpuCore {
                 targets: Borrowed(frag.targets),
             }),
             multiview: desc.multiview,
+            cache: desc.cache.map(|c| c.id.into()),
         };
 
         let (id, error) = wgc::gfx_select!(device => self.0.device_create_render_pipeline(
@@ -1234,6 +1235,7 @@ impl crate::Context for ContextWgpuCore {
                     .compilation_options
                     .zero_initialize_workgroup_memory,
             },
+            cache: desc.cache.map(|c| c.id.into()),
         };
 
         let (id, error) = wgc::gfx_select!(device => self.0.device_create_compute_pipeline(
diff --git a/wgpu/src/lib.rs b/wgpu/src/lib.rs
index a86a4441f7..ee956ce3e2 100644
--- a/wgpu/src/lib.rs
+++ b/wgpu/src/lib.rs
@@ -1871,6 +1871,7 @@ pub struct RenderPipelineDescriptor<'a> {
     /// If the pipeline will be used with a multiview render pass, this indicates how many array
     /// layers the attachments will have.
     pub multiview: Option<NonZeroU32>,
+    pub cache: Option<&'a PipelineCache>,
 }
 #[cfg(send_sync)]
 static_assertions::assert_impl_all!(RenderPipelineDescriptor<'_>: Send, Sync);
@@ -1964,6 +1965,7 @@ pub struct ComputePipelineDescriptor<'a> {
     /// The name of the entry point in the compiled shader. There must be a function with this name
     /// and no return value in the shader.
     pub entry_point: &'a str,
+    pub cache: Option<&'a PipelineCache>,
     /// Advanced options for when this pipeline is compiled
     ///
     /// This implements `Default`, and for most users can be set to `Default::default()`

From 7edb08b6bf1f90c5a4cdc822415ab750722a8dbd Mon Sep 17 00:00:00 2001
From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com>
Date: Tue, 27 Feb 2024 16:17:01 +0000
Subject: [PATCH 03/31] Support using a pipeline cache when creating a pipeline

Temp: Start wiring up data access
---
 wgpu-core/src/device/resource.rs              | 34 +++++++++++++++++++
 wgpu-hal/examples/halmark/main.rs             |  1 +
 wgpu-hal/examples/ray-traced-triangle/main.rs |  1 +
 wgpu-hal/src/lib.rs                           |  2 ++
 wgpu/src/backend/wgpu_core.rs                 |  8 +++++
 wgpu/src/context.rs                           | 22 ++++++++++++
 wgpu/src/lib.rs                               |  5 +--
 7 files changed, 71 insertions(+), 2 deletions(-)

diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index a6e3c8d998..1ec446bbd4 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -2736,6 +2736,7 @@ impl<A: HalApi> Device<A> {
         implicit_context: Option<ImplicitPipelineContext>,
         hub: &Hub<A>,
     ) -> Result<pipeline::ComputePipeline<A>, pipeline::CreateComputePipelineError> {
+        {}
         // This has to be done first, or otherwise the IDs may be pointing to entries
         // that are not even in the storage.
         if let Some(ref ids) = implicit_context {
@@ -2822,6 +2823,20 @@ impl<A: HalApi> Device<A> {
         let late_sized_buffer_groups =
             Device::make_late_sized_buffer_groups(&shader_binding_sizes, &pipeline_layout);
 
+        let cache = if let Some(cache) = desc.cache {
+            let cache = hub
+                .pipeline_caches
+                .get(cache)
+                .map_err(|_| validation::StageError::InvalidModule)?;
+
+            if cache.device.as_info().id() != self.as_info().id() {
+                return Err(DeviceError::WrongDevice.into());
+            }
+            Some(cache)
+        } else {
+            None
+        };
+
         let pipeline_desc = hal::ComputePipelineDescriptor {
             label: desc.label.to_hal(self.instance_flags),
             layout: pipeline_layout.raw(),
@@ -2831,6 +2846,7 @@ impl<A: HalApi> Device<A> {
                 constants: desc.stage.constants.as_ref(),
                 zero_initialize_workgroup_memory: desc.stage.zero_initialize_workgroup_memory,
             },
+            cache: cache.as_ref().map(|it| it.raw.as_ref()).flatten(),
         };
 
         let raw = unsafe {
@@ -3395,6 +3411,23 @@ impl<A: HalApi> Device<A> {
             }
         }
 
+        let cache = if let Some(cache) = desc.cache {
+            let cache = hub
+                .pipeline_caches
+                .get(cache)
+                // This is clearly wrong, but I'm just trying to fix the type errors
+                .map_err(|_| {
+                    pipeline::CreateRenderPipelineError::ConservativeRasterizationNonFillPolygonMode
+                })?;
+
+            if cache.device.as_info().id() != self.as_info().id() {
+                return Err(DeviceError::WrongDevice.into());
+            }
+            Some(cache)
+        } else {
+            None
+        };
+
         let late_sized_buffer_groups =
             Device::make_late_sized_buffer_groups(&shader_binding_sizes, &pipeline_layout);
 
@@ -3409,6 +3442,7 @@ impl<A: HalApi> Device<A> {
             fragment_stage,
             color_targets,
             multiview: desc.multiview,
+            cache: cache.as_ref().map(|it| it.raw.as_ref()).flatten(),
         };
         let raw = unsafe {
             self.raw
diff --git a/wgpu-hal/examples/halmark/main.rs b/wgpu-hal/examples/halmark/main.rs
index aef6919c8f..ee59fa2590 100644
--- a/wgpu-hal/examples/halmark/main.rs
+++ b/wgpu-hal/examples/halmark/main.rs
@@ -274,6 +274,7 @@ impl<A: hal::Api> Example<A> {
                 write_mask: wgt::ColorWrites::default(),
             })],
             multiview: None,
+            cache: None,
         };
         let pipeline = unsafe { device.create_render_pipeline(&pipeline_desc).unwrap() };
 
diff --git a/wgpu-hal/examples/ray-traced-triangle/main.rs b/wgpu-hal/examples/ray-traced-triangle/main.rs
index 3985cd60af..8f404dc4d2 100644
--- a/wgpu-hal/examples/ray-traced-triangle/main.rs
+++ b/wgpu-hal/examples/ray-traced-triangle/main.rs
@@ -374,6 +374,7 @@ impl<A: hal::Api> Example<A> {
                     constants: &Default::default(),
                     zero_initialize_workgroup_memory: true,
                 },
+                cache: None,
             })
         }
         .unwrap();
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index 8f1b214e4d..b68bd7f09d 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -1642,6 +1642,7 @@ pub struct ComputePipelineDescriptor<'a, A: Api> {
     pub layout: &'a A::PipelineLayout,
     /// The compiled compute stage and its entry point.
     pub stage: ProgrammableStage<'a, A>,
+    pub cache: Option<&'a A::PipelineCache>,
 }
 
 pub struct PipelineCacheDescriptor<'a> {
@@ -1683,6 +1684,7 @@ pub struct RenderPipelineDescriptor<'a, A: Api> {
     /// If the pipeline will be used with a multiview render pass, this indicates how many array
     /// layers the attachments will have.
     pub multiview: Option<NonZeroU32>,
+    pub cache: Option<&'a A::PipelineCache>,
 }
 
 #[derive(Debug, Clone)]
diff --git a/wgpu/src/backend/wgpu_core.rs b/wgpu/src/backend/wgpu_core.rs
index b88f00fd25..1a013bddc7 100644
--- a/wgpu/src/backend/wgpu_core.rs
+++ b/wgpu/src/backend/wgpu_core.rs
@@ -2387,6 +2387,14 @@ impl crate::Context for ContextWgpuCore {
         wgc::gfx_select!(device => self.0.device_stop_capture(*device));
     }
 
+    fn pipeline_cache_get_data(
+        &self,
+        cache: &Self::PipelineCacheId,
+        cache_data: &Self::PipelineCacheData,
+    ) -> Option<Vec<u8>> {
+        wgc::gfx_select!(cache => self.0.pipeline_cache_get_data(*cache));
+    }
+
     fn compute_pass_set_pipeline(
         &self,
         _pass: &mut Self::ComputePassId,
diff --git a/wgpu/src/context.rs b/wgpu/src/context.rs
index ac975add9f..b5f9e2a5f5 100644
--- a/wgpu/src/context.rs
+++ b/wgpu/src/context.rs
@@ -633,6 +633,12 @@ pub trait Context: Debug + WasmNotSendSync + Sized {
     fn device_start_capture(&self, device: &Self::DeviceId, device_data: &Self::DeviceData);
     fn device_stop_capture(&self, device: &Self::DeviceId, device_data: &Self::DeviceData);
 
+    fn pipeline_cache_get_data(
+        &self,
+        cache: &Self::PipelineCacheId,
+        cache_data: &Self::PipelineCacheData,
+    ) -> Option<Vec<u8>>;
+
     fn compute_pass_set_pipeline(
         &self,
         pass: &mut Self::ComputePassId,
@@ -1634,6 +1640,12 @@ pub(crate) trait DynContext: Debug + WasmNotSendSync {
     fn device_start_capture(&self, device: &ObjectId, data: &crate::Data);
     fn device_stop_capture(&self, device: &ObjectId, data: &crate::Data);
 
+    fn pipeline_cache_get_data(
+        &self,
+        cache: &ObjectId,
+        cache_data: &crate::Data,
+    ) -> Option<Vec<u8>>;
+
     fn compute_pass_set_pipeline(
         &self,
         pass: &mut ObjectId,
@@ -3149,6 +3161,16 @@ where
         Context::device_stop_capture(self, &device, device_data)
     }
 
+    fn pipeline_cache_get_data(
+        &self,
+        cache: &ObjectId,
+        cache_data: &crate::Data,
+    ) -> Option<Vec<u8>> {
+        let mut cache = <T::PipelineCacheId>::from(*cache);
+        let cache_data = downcast_mut::<T::PipelineCacheData>(cache_data);
+        Context::pipeline_cache_get_data(self, &mut cache, cache_data)
+    }
+
     fn compute_pass_set_pipeline(
         &self,
         pass: &mut ObjectId,
diff --git a/wgpu/src/lib.rs b/wgpu/src/lib.rs
index ee956ce3e2..3522711160 100644
--- a/wgpu/src/lib.rs
+++ b/wgpu/src/lib.rs
@@ -1130,8 +1130,9 @@ impl PipelineCache {
     /// Get the data associated with this pipeline cache.
     /// The format is unspecified, and should be passed to a call to
     /// [`Device::create_pipeline_cache`] for a compatible device.
-    pub fn get_data() -> Option<Vec<u8>> {
-        None
+    pub fn get_data(&self) -> Option<Vec<u8>> {
+        self.context
+            .pipeline_cache_get_data(&mut self.id, &mut self.data)
     }
 }
 

From fb56d8c7daca8c17c840820a0e0c78f085082da9 Mon Sep 17 00:00:00 2001
From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com>
Date: Wed, 28 Feb 2024 09:58:12 +0000
Subject: [PATCH 04/31] Finish implementing the anemic version of pipeline
 caching

Probably fix incorrect reference
---
 wgpu-core/src/device/global.rs | 22 ++++++++++++++++++++--
 wgpu-hal/src/lib.rs            |  5 +++++
 wgpu-hal/src/vulkan/device.rs  |  5 +++++
 wgpu/src/backend/wgpu_core.rs  |  3 ++-
 wgpu/src/context.rs            |  2 +-
 wgpu/src/lib.rs                |  2 +-
 6 files changed, 34 insertions(+), 5 deletions(-)

diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs
index 83777edf20..76bf37b085 100644
--- a/wgpu-core/src/device/global.rs
+++ b/wgpu-core/src/device/global.rs
@@ -13,8 +13,10 @@ use crate::{
     instance::{self, Adapter, Surface},
     lock::{rank, RwLock},
     pipeline, present,
-    resource::{self, BufferAccessResult},
-    resource::{BufferAccessError, BufferMapOperation, CreateBufferError, Resource},
+    resource::{
+        self, BufferAccessError, BufferAccessResult, BufferMapOperation, CreateBufferError,
+        Resource,
+    },
     validation::check_buffer_usage,
     Label, LabelHelpers as _,
 };
@@ -2317,6 +2319,22 @@ impl Global {
             .force_replace_with_error(device_id, "Made invalid.");
     }
 
+    pub fn pipeline_cache_get_data<A: HalApi>(&self, id: id::PipelineCacheId) -> Option<Vec<u8>> {
+        api_log!("PipelineCache::get_data");
+        let hub = A::hub(self);
+
+        if let Ok(cache) = hub.pipeline_caches.get(id) {
+            // TODO: Is this check needed?
+            if !cache.device.is_valid() {
+                return None;
+            }
+            if let Some(raw_cache) = cache.raw.as_ref() {
+                return unsafe { cache.device.raw().pipeline_cache_get_data(raw_cache) };
+            }
+        }
+        None
+    }
+
     pub fn device_drop<A: HalApi>(&self, device_id: DeviceId) {
         profiling::scope!("Device::drop");
         api_log!("Device::drop {device_id:?}");
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index b68bd7f09d..07f1d9760e 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -658,6 +658,11 @@ pub trait Device: WasmNotSendSync {
     unsafe fn start_capture(&self) -> bool;
     unsafe fn stop_capture(&self);
 
+    #[allow(unused_variables)]
+    unsafe fn pipeline_cache_get_data(&self, cache: &A::PipelineCache) -> Option<Vec<u8>> {
+        None
+    }
+
     unsafe fn create_acceleration_structure(
         &self,
         desc: &AccelerationStructureDescriptor,
diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs
index b9a98fce6c..5706861de9 100644
--- a/wgpu-hal/src/vulkan/device.rs
+++ b/wgpu-hal/src/vulkan/device.rs
@@ -2132,6 +2132,11 @@ impl crate::Device for super::Device {
         }
     }
 
+    unsafe fn pipeline_cache_get_data(&self, cache: &PipelineCache) -> Option<Vec<u8>> {
+        let data = unsafe { self.raw_device().get_pipeline_cache_data(cache.raw) };
+        data.ok()
+    }
+
     unsafe fn get_acceleration_structure_build_sizes<'a>(
         &self,
         desc: &crate::GetAccelerationStructureBuildSizesDescriptor<'a, super::Api>,
diff --git a/wgpu/src/backend/wgpu_core.rs b/wgpu/src/backend/wgpu_core.rs
index 1a013bddc7..4e26ffbaaa 100644
--- a/wgpu/src/backend/wgpu_core.rs
+++ b/wgpu/src/backend/wgpu_core.rs
@@ -2390,9 +2390,10 @@ impl crate::Context for ContextWgpuCore {
     fn pipeline_cache_get_data(
         &self,
         cache: &Self::PipelineCacheId,
+        // TODO: Used for error handling?
         cache_data: &Self::PipelineCacheData,
     ) -> Option<Vec<u8>> {
-        wgc::gfx_select!(cache => self.0.pipeline_cache_get_data(*cache));
+        wgc::gfx_select!(cache => self.0.pipeline_cache_get_data(*cache))
     }
 
     fn compute_pass_set_pipeline(
diff --git a/wgpu/src/context.rs b/wgpu/src/context.rs
index b5f9e2a5f5..85ac77ccf3 100644
--- a/wgpu/src/context.rs
+++ b/wgpu/src/context.rs
@@ -3167,7 +3167,7 @@ where
         cache_data: &crate::Data,
     ) -> Option<Vec<u8>> {
         let mut cache = <T::PipelineCacheId>::from(*cache);
-        let cache_data = downcast_mut::<T::PipelineCacheData>(cache_data);
+        let cache_data = downcast_ref::<T::PipelineCacheData>(cache_data);
         Context::pipeline_cache_get_data(self, &mut cache, cache_data)
     }
 
diff --git a/wgpu/src/lib.rs b/wgpu/src/lib.rs
index 3522711160..29e400f0ce 100644
--- a/wgpu/src/lib.rs
+++ b/wgpu/src/lib.rs
@@ -1132,7 +1132,7 @@ impl PipelineCache {
     /// [`Device::create_pipeline_cache`] for a compatible device.
     pub fn get_data(&self) -> Option<Vec<u8>> {
         self.context
-            .pipeline_cache_get_data(&mut self.id, &mut self.data)
+            .pipeline_cache_get_data(&self.id, self.data.as_ref())
     }
 }
 

From 1b53496163f15ff5b7e25c0eeeb30c9e2e22fd98 Mon Sep 17 00:00:00 2001
From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com>
Date: Wed, 28 Feb 2024 15:39:03 +0000
Subject: [PATCH 05/31] Actually pass the cache to the data

---
 wgpu-hal/src/vulkan/device.rs | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs
index 5706861de9..8594b002c0 100644
--- a/wgpu-hal/src/vulkan/device.rs
+++ b/wgpu-hal/src/vulkan/device.rs
@@ -1868,7 +1868,13 @@ impl crate::Device for super::Device {
             unsafe {
                 self.shared
                     .raw
-                    .create_graphics_pipelines(vk::PipelineCache::null(), &vk_infos, None)
+                    .create_graphics_pipelines(
+                        desc.cache
+                            .map(|it| it.raw)
+                            .unwrap_or(vk::PipelineCache::null()),
+                        &vk_infos,
+                        None,
+                    )
                     .map_err(|(_, e)| crate::DeviceError::from(e))
             }?
         };
@@ -1920,7 +1926,13 @@ impl crate::Device for super::Device {
             unsafe {
                 self.shared
                     .raw
-                    .create_compute_pipelines(vk::PipelineCache::null(), &vk_infos, None)
+                    .create_compute_pipelines(
+                        desc.cache
+                            .map(|it| it.raw)
+                            .unwrap_or(vk::PipelineCache::null()),
+                        &vk_infos,
+                        None,
+                    )
                     .map_err(|(_, e)| crate::DeviceError::from(e))
             }?
         };

From 2e0e61403ee74a19696b80e5cff3dea266435522 Mon Sep 17 00:00:00 2001
From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com>
Date: Thu, 29 Feb 2024 10:55:05 +0000
Subject: [PATCH 06/31] Move `pipeline_cache_key` to `wgpu::util`

Remove optionality

Resolve the real CI issues
---
 wgpu-core/src/device/global.rs   |  53 +++++++++------
 wgpu-core/src/device/resource.rs |  22 ++++--
 wgpu-core/src/pipeline.rs        |  23 +++++++
 wgpu-hal/src/dx12/device.rs      |   4 +-
 wgpu-hal/src/empty.rs            |   4 +-
 wgpu-hal/src/gles/device.rs      |   9 ++-
 wgpu-hal/src/lib.rs              |  10 ++-
 wgpu-hal/src/metal/device.rs     |   4 +-
 wgpu-hal/src/vulkan/device.rs    |  12 ++--
 wgpu-types/src/lib.rs            |  19 ------
 wgpu/src/backend/wgpu_core.rs    |  39 ++++++++---
 wgpu/src/context.rs              |  27 ++++----
 wgpu/src/lib.rs                  | 113 +++++++++++++++++++++++++++----
 wgpu/src/util/mod.rs             |  46 +++++++++++++
 14 files changed, 287 insertions(+), 98 deletions(-)

diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs
index 76bf37b085..1526f747aa 100644
--- a/wgpu-core/src/device/global.rs
+++ b/wgpu-core/src/device/global.rs
@@ -1827,38 +1827,53 @@ impl Global {
         }
     }
 
+    /// # Safety
+    /// The `data` argument of `desc` must have been returned by
+    /// [Self::pipeline_cache_get_data] for the same adapter
     pub unsafe fn device_create_pipeline_cache<A: HalApi>(
         &self,
         device_id: DeviceId,
         desc: &pipeline::PipelineCacheDescriptor<'_>,
         id_in: Option<id::PipelineCacheId>,
-    ) -> Option<id::PipelineCacheId> {
+    ) -> (
+        id::PipelineCacheId,
+        Option<pipeline::CreatePipelineCacheError>,
+    ) {
         profiling::scope!("Device::create_pipeline_cache");
 
         let hub = A::hub(self);
 
         let fid = hub.pipeline_caches.prepare(id_in);
-        let device = match hub.devices.get(device_id) {
-            Ok(device) => device,
-            // TODO: Handle error properly
-            Err(_) => return None,
+        let error: pipeline::CreatePipelineCacheError = 'error: {
+            let device = match hub.devices.get(device_id) {
+                Ok(device) => device,
+                // TODO: Handle error properly
+                Err(crate::storage::InvalidId) => break 'error DeviceError::Invalid.into(),
+            };
+            if !device.is_valid() {
+                break 'error DeviceError::Lost.into();
+            }
+            #[cfg(feature = "trace")]
+            if let Some(ref mut trace) = *device.trace.lock() {
+                trace.add(trace::Action::CreatePipelineCache {
+                    id: fid.id(),
+                    desc: desc.clone(),
+                });
+            }
+            let cache = unsafe { device.create_pipeline_cache(desc) };
+            match cache {
+                Ok(cache) => {
+                    let (id, _) = fid.assign(cache);
+                    api_log!("Device::create_pipeline_cache -> {id:?}");
+                    return (id, None);
+                }
+                Err(e) => break 'error e,
+            }
         };
-        if !device.is_valid() {
-            return None;
-        }
 
-        #[cfg(feature = "trace")]
-        if let Some(ref mut trace) = *device.trace.lock() {
-            trace.add(trace::Action::CreatePipelineCache {
-                id: fid.id(),
-                desc: desc.clone(),
-            });
-        }
-        let pipeline = unsafe { device.create_pipeline_cache(desc) }?;
-        let (id, _) = fid.assign(pipeline);
-        api_log!("Device::create_pipeline_cache -> {id:?}");
+        let id = fid.assign_error(desc.label.borrow_or_default());
 
-        Some(id)
+        (id, Some(error))
     }
 
     pub fn pipeline_cache_drop<A: HalApi>(&self, pipeline_cache_id: id::PipelineCacheId) {
diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index 1ec446bbd4..ce6d9d97c0 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -2736,7 +2736,6 @@ impl<A: HalApi> Device<A> {
         implicit_context: Option<ImplicitPipelineContext>,
         hub: &Hub<A>,
     ) -> Result<pipeline::ComputePipeline<A>, pipeline::CreateComputePipelineError> {
-        {}
         // This has to be done first, or otherwise the IDs may be pointing to entries
         // that are not even in the storage.
         if let Some(ref ids) = implicit_context {
@@ -2846,7 +2845,7 @@ impl<A: HalApi> Device<A> {
                 constants: desc.stage.constants.as_ref(),
                 zero_initialize_workgroup_memory: desc.stage.zero_initialize_workgroup_memory,
             },
-            cache: cache.as_ref().map(|it| it.raw.as_ref()).flatten(),
+            cache: cache.as_ref().and_then(|it| it.raw.as_ref()),
         };
 
         let raw = unsafe {
@@ -3442,7 +3441,7 @@ impl<A: HalApi> Device<A> {
             fragment_stage,
             color_targets,
             multiview: desc.multiview,
-            cache: cache.as_ref().map(|it| it.raw.as_ref()).flatten(),
+            cache: cache.as_ref().and_then(|it| it.raw.as_ref()),
         };
         let raw = unsafe {
             self.raw
@@ -3523,22 +3522,31 @@ impl<A: HalApi> Device<A> {
         Ok(pipeline)
     }
 
+    /// # Safety
+    /// The `data` field on `desc` must have previously been returned from [`crate::global::Global::pipeline_cache_get_data`]
     pub unsafe fn create_pipeline_cache(
         self: &Arc<Self>,
         desc: &pipeline::PipelineCacheDescriptor,
-    ) -> Option<pipeline::PipelineCache<A>> {
-        let cache_desc = hal::PipelineCacheDescriptor {
+    ) -> Result<pipeline::PipelineCache<A>, pipeline::CreatePipelineCacheError> {
+        let mut cache_desc = hal::PipelineCacheDescriptor {
             data: desc.data.as_deref(),
             label: desc.label.to_hal(self.instance_flags),
         };
-        let raw = unsafe { (&self.raw.as_ref().unwrap()).create_pipeline_cache(&cache_desc) }?;
+        let raw = match unsafe { self.raw().create_pipeline_cache(&cache_desc) } {
+            Ok(raw) => raw,
+            Err(hal::PipelineCacheError::Validation) if desc.fallback => {
+                debug_assert!(cache_desc.data.take().is_some());
+                unsafe { self.raw().create_pipeline_cache(&cache_desc)? }
+            }
+            Err(e) => return Err(e.into()),
+        };
         let cache = pipeline::PipelineCache {
             device: self.clone(),
             info: ResourceInfo::new(desc.label.borrow_or_default()),
             // This would be none in the error condition, which we don't implement yet
             raw: Some(raw),
         };
-        Some(cache)
+        Ok(cache)
     }
 
     pub(crate) fn get_texture_format_features(
diff --git a/wgpu-core/src/pipeline.rs b/wgpu-core/src/pipeline.rs
index 67d6852cb1..bb1d33c1f4 100644
--- a/wgpu-core/src/pipeline.rs
+++ b/wgpu-core/src/pipeline.rs
@@ -260,6 +260,28 @@ impl<A: HalApi> ComputePipeline<A> {
     }
 }
 
+#[derive(Clone, Debug, Error)]
+#[non_exhaustive]
+pub enum CreatePipelineCacheError {
+    #[error(transparent)]
+    Device(#[from] DeviceError),
+    #[error("Pipeline cache validation failed")]
+    Validation,
+    #[error("Internal error: {0}")]
+    Internal(String),
+}
+
+impl From<hal::PipelineCacheError> for CreatePipelineCacheError {
+    fn from(value: hal::PipelineCacheError) -> Self {
+        match value {
+            hal::PipelineCacheError::Device(device) => {
+                CreatePipelineCacheError::Device(device.into())
+            }
+            hal::PipelineCacheError::Validation => CreatePipelineCacheError::Validation,
+        }
+    }
+}
+
 #[derive(Debug)]
 pub struct PipelineCache<A: HalApi> {
     pub(crate) raw: Option<A::PipelineCache>,
@@ -363,6 +385,7 @@ pub struct RenderPipelineDescriptor<'a> {
 pub struct PipelineCacheDescriptor<'a> {
     pub label: Label<'a>,
     pub data: Option<Cow<'a, [u8]>>,
+    pub fallback: bool,
 }
 
 #[derive(Clone, Debug, Error)]
diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs
index 86709b2434..0948cbcdac 100644
--- a/wgpu-hal/src/dx12/device.rs
+++ b/wgpu-hal/src/dx12/device.rs
@@ -1520,8 +1520,8 @@ impl crate::Device for super::Device {
     unsafe fn create_pipeline_cache(
         &self,
         desc: &crate::PipelineCacheDescriptor<'_>,
-    ) -> Option<()> {
-        None
+    ) -> Result<(), crate::PipelineCacheError> {
+        Ok(())
     }
     unsafe fn destroy_pipeline_cache(&self, (): ()) {}
 
diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs
index 4138e6e5ff..f1986f7705 100644
--- a/wgpu-hal/src/empty.rs
+++ b/wgpu-hal/src/empty.rs
@@ -224,8 +224,8 @@ impl crate::Device for Context {
     unsafe fn create_pipeline_cache(
         &self,
         desc: &crate::PipelineCacheDescriptor<'_>,
-    ) -> Option<Resource> {
-        Some(Resource)
+    ) -> Result<Resource, crate::PipelineCacheError> {
+        Ok(Resource)
     }
     unsafe fn destroy_pipeline_cache(&self, cache: Resource) {}
 
diff --git a/wgpu-hal/src/gles/device.rs b/wgpu-hal/src/gles/device.rs
index bed69f3d96..d5f71ce646 100644
--- a/wgpu-hal/src/gles/device.rs
+++ b/wgpu-hal/src/gles/device.rs
@@ -1406,8 +1406,13 @@ impl crate::Device for super::Device {
         }
     }
 
-    unsafe fn create_pipeline_cache(&self, _: &crate::PipelineCacheDescriptor<'_>) -> Option<()> {
-        None
+    unsafe fn create_pipeline_cache(
+        &self,
+        _: &crate::PipelineCacheDescriptor<'_>,
+    ) -> Result<(), crate::PipelineCacheError> {
+        // Even though the cache doesn't do anything, we still return something here
+        // as the least bad option
+        Ok(())
     }
     unsafe fn destroy_pipeline_cache(&self, (): ()) {}
 
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index 07f1d9760e..7e2803b743 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -332,6 +332,14 @@ pub enum PipelineError {
     Device(#[from] DeviceError),
 }
 
+#[derive(Clone, Debug, Eq, PartialEq, Error)]
+pub enum PipelineCacheError {
+    #[error(transparent)]
+    Device(#[from] DeviceError),
+    #[error("Pipeline cache had a validation error")]
+    Validation,
+}
+
 #[derive(Clone, Debug, Eq, PartialEq, Error)]
 pub enum SurfaceError {
     #[error("Surface is lost")]
@@ -615,7 +623,7 @@ pub trait Device: WasmNotSendSync {
     unsafe fn create_pipeline_cache(
         &self,
         desc: &PipelineCacheDescriptor<'_>,
-    ) -> Option<A::PipelineCache>;
+    ) -> Result<A::PipelineCache, PipelineCacheError>;
     unsafe fn destroy_pipeline_cache(&self, cache: A::PipelineCache);
 
     unsafe fn create_query_set(
diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs
index 96f03aa66f..c3b774ca40 100644
--- a/wgpu-hal/src/metal/device.rs
+++ b/wgpu-hal/src/metal/device.rs
@@ -1102,8 +1102,8 @@ impl crate::Device for super::Device {
     unsafe fn create_pipeline_cache(
         &self,
         desc: &crate::PipelineCacheDescriptor<'_>,
-    ) -> Option<()> {
-        None
+    ) -> Result<(), crate::PipelineCacheError> {
+        Ok(())
     }
     unsafe fn destroy_pipeline_cache(&self, (): ()) {}
 
diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs
index 8594b002c0..1dc2ff2215 100644
--- a/wgpu-hal/src/vulkan/device.rs
+++ b/wgpu-hal/src/vulkan/device.rs
@@ -1958,19 +1958,17 @@ impl crate::Device for super::Device {
     unsafe fn create_pipeline_cache(
         &self,
         desc: &crate::PipelineCacheDescriptor<'_>,
-    ) -> Option<PipelineCache> {
+    ) -> Result<PipelineCache, crate::PipelineCacheError> {
         let mut info = vk::PipelineCacheCreateInfo::builder();
         // TODO: Add additional validation to the data, as described in https://medium.com/@zeuxcg/creating-a-robust-pipeline-cache-with-vulkan-961d09416cda
         if let Some(data) = desc.data {
             info = info.initial_data(data)
         }
-        // TODO: Proper error handling
-        let raw = {
-            profiling::scope!("vkCreatePipelineCache");
-            unsafe { self.shared.raw.create_pipeline_cache(&info, None) }.ok()?
-        };
+        profiling::scope!("vkCreatePipelineCache");
+        let raw = unsafe { self.shared.raw.create_pipeline_cache(&info, None) }
+            .map_err(crate::DeviceError::from)?;
 
-        Some(PipelineCache { raw })
+        Ok(PipelineCache { raw })
     }
     unsafe fn destroy_pipeline_cache(&self, cache: PipelineCache) {
         unsafe { self.shared.raw.destroy_pipeline_cache(cache.raw, None) }
diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs
index b88546ee48..7049cd3a8d 100644
--- a/wgpu-types/src/lib.rs
+++ b/wgpu-types/src/lib.rs
@@ -1748,25 +1748,6 @@ pub struct AdapterInfo {
     pub backend: Backend,
 }
 
-impl AdapterInfo {
-    /// A recommended filename for storing the pipeline cache of this adapter
-    ///
-    /// Each adapter may have a different filename, to allow using multiple caches
-    pub fn pipeline_cache_key(&self) -> Option<String> {
-        match self.backend {
-            Backend::Vulkan => Some(format!(
-                // The vendor/device should uniquely define a driver
-                // We will also later validate that the vendor and driver
-                // version match, which may lead to clearing an outdated
-                // cache for the same device.
-                "wgpu_pipeline_cache_vulkan_{}_{}",
-                self.vendor, self.device
-            )),
-            _ => None,
-        }
-    }
-}
-
 /// Describes a [`Device`](../wgpu/struct.Device.html).
 ///
 /// Corresponds to [WebGPU `GPUDeviceDescriptor`](
diff --git a/wgpu/src/backend/wgpu_core.rs b/wgpu/src/backend/wgpu_core.rs
index 4e26ffbaaa..f0e647da6c 100644
--- a/wgpu/src/backend/wgpu_core.rs
+++ b/wgpu/src/backend/wgpu_core.rs
@@ -1270,19 +1270,29 @@ impl crate::Context for ContextWgpuCore {
         // TODO: Will be used for error handling
         device_data: &Self::DeviceData,
         desc: &PipelineCacheInitDescriptor<'_>,
-    ) -> Option<(Self::PipelineCacheId, Self::PipelineCacheData)> {
+    ) -> (Self::PipelineCacheId, Self::PipelineCacheData) {
         use wgc::pipeline as pipe;
 
         let descriptor = pipe::PipelineCacheDescriptor {
             label: desc.label.map(Borrowed),
             data: Some(desc.data.into()),
+            fallback: desc.fallback,
         };
-        let id = wgc::gfx_select!(device => self.0.device_create_pipeline_cache(
+        let (id, error) = wgc::gfx_select!(device => self.0.device_create_pipeline_cache(
             *device,
             &descriptor,
             None
-        ))?;
-        Some((id, ()))
+        ));
+        if let Some(cause) = error {
+            self.handle_error(
+                &device_data.error_sink,
+                cause,
+                LABEL,
+                desc.label,
+                "Device::device_create_pipeline_cache_init",
+            );
+        }
+        (id, ())
     }
 
     fn device_create_pipeline_cache(
@@ -1291,22 +1301,33 @@ impl crate::Context for ContextWgpuCore {
         // TODO: Will be used for error handling
         device_data: &Self::DeviceData,
         desc: &crate::PipelineCacheDescriptor<'_>,
-    ) -> Option<(Self::PipelineCacheId, Self::PipelineCacheData)> {
+    ) -> (Self::PipelineCacheId, Self::PipelineCacheData) {
         use wgc::pipeline as pipe;
 
         let descriptor = pipe::PipelineCacheDescriptor {
             label: desc.label.map(Borrowed),
             data: None,
+            // if data is `None`, fallback won't be used
+            fallback: false,
         };
         // Safety: data is None, so no safety concerns
-        let id = unsafe {
+        let (id, error) = unsafe {
             wgc::gfx_select!(device => self.0.device_create_pipeline_cache(
                 *device,
                 &descriptor,
                 None
             ))
-        }?;
-        Some((id, ()))
+        };
+        if let Some(cause) = error {
+            self.handle_error(
+                &device_data.error_sink,
+                cause,
+                LABEL,
+                desc.label,
+                "Device::device_create_pipeline_cache_init",
+            );
+        }
+        (id, ())
     }
 
     fn device_create_buffer(
@@ -2391,7 +2412,7 @@ impl crate::Context for ContextWgpuCore {
         &self,
         cache: &Self::PipelineCacheId,
         // TODO: Used for error handling?
-        cache_data: &Self::PipelineCacheData,
+        _cache_data: &Self::PipelineCacheData,
     ) -> Option<Vec<u8>> {
         wgc::gfx_select!(cache => self.0.pipeline_cache_get_data(*cache))
     }
diff --git a/wgpu/src/context.rs b/wgpu/src/context.rs
index 85ac77ccf3..19ccfaeb96 100644
--- a/wgpu/src/context.rs
+++ b/wgpu/src/context.rs
@@ -241,13 +241,13 @@ pub trait Context: Debug + WasmNotSendSync + Sized {
         device: &Self::DeviceId,
         device_data: &Self::DeviceData,
         desc: &PipelineCacheInitDescriptor<'_>,
-    ) -> Option<(Self::PipelineCacheId, Self::PipelineCacheData)>;
+    ) -> (Self::PipelineCacheId, Self::PipelineCacheData);
     fn device_create_pipeline_cache(
         &self,
         device: &Self::DeviceId,
         device_data: &Self::DeviceData,
         desc: &PipelineCacheDescriptor<'_>,
-    ) -> Option<(Self::PipelineCacheId, Self::PipelineCacheData)>;
+    ) -> (Self::PipelineCacheId, Self::PipelineCacheData);
     fn device_create_buffer(
         &self,
         device: &Self::DeviceId,
@@ -1302,13 +1302,13 @@ pub(crate) trait DynContext: Debug + WasmNotSendSync {
         device: &ObjectId,
         device_data: &crate::Data,
         desc: &PipelineCacheInitDescriptor<'_>,
-    ) -> Option<(ObjectId, Box<crate::Data>)>;
+    ) -> (ObjectId, Box<crate::Data>);
     fn device_create_pipeline_cache(
         &self,
         device: &ObjectId,
         device_data: &crate::Data,
         desc: &PipelineCacheDescriptor<'_>,
-    ) -> Option<(ObjectId, Box<crate::Data>)>;
+    ) -> (ObjectId, Box<crate::Data>);
     fn device_create_buffer(
         &self,
         device: &ObjectId,
@@ -2347,13 +2347,12 @@ where
         device: &ObjectId,
         device_data: &crate::Data,
         desc: &PipelineCacheInitDescriptor<'_>,
-    ) -> Option<(ObjectId, Box<crate::Data>)> {
+    ) -> (ObjectId, Box<crate::Data>) {
         let device = <T::DeviceId>::from(*device);
         let device_data = downcast_ref(device_data);
-        let (pipeline_cache, data) = unsafe {
-            Context::device_create_pipeline_cache_init(self, &device, device_data, desc)
-        }?;
-        Some((pipeline_cache.into(), Box::new(data) as _))
+        let (pipeline_cache, data) =
+            unsafe { Context::device_create_pipeline_cache_init(self, &device, device_data, desc) };
+        (pipeline_cache.into(), Box::new(data) as _)
     }
 
     fn device_create_pipeline_cache(
@@ -2361,12 +2360,12 @@ where
         device: &ObjectId,
         device_data: &crate::Data,
         desc: &PipelineCacheDescriptor<'_>,
-    ) -> Option<(ObjectId, Box<crate::Data>)> {
+    ) -> (ObjectId, Box<crate::Data>) {
         let device = <T::DeviceId>::from(*device);
         let device_data = downcast_ref(device_data);
         let (pipeline_cache, data) =
-            Context::device_create_pipeline_cache(self, &device, device_data, desc)?;
-        Some((pipeline_cache.into(), Box::new(data) as _))
+            Context::device_create_pipeline_cache(self, &device, device_data, desc);
+        (pipeline_cache.into(), Box::new(data) as _)
     }
 
     fn device_create_buffer(
@@ -3166,9 +3165,9 @@ where
         cache: &ObjectId,
         cache_data: &crate::Data,
     ) -> Option<Vec<u8>> {
-        let mut cache = <T::PipelineCacheId>::from(*cache);
+        let cache = <T::PipelineCacheId>::from(*cache);
         let cache_data = downcast_ref::<T::PipelineCacheData>(cache_data);
-        Context::pipeline_cache_get_data(self, &mut cache, cache_data)
+        Context::pipeline_cache_get_data(self, &cache, cache_data)
     }
 
     fn compute_pass_set_pipeline(
diff --git a/wgpu/src/lib.rs b/wgpu/src/lib.rs
index 29e400f0ce..de3f524ccc 100644
--- a/wgpu/src/lib.rs
+++ b/wgpu/src/lib.rs
@@ -1115,6 +1115,15 @@ impl ComputePipeline {
 /// creating [`RenderPipeline`]s and [`ComputePipeline`]s
 /// in subsequent executions
 ///
+/// # Usage
+///
+/// TODO
+///
+/// # Memory Usage
+/// There is not currently an API available to reduce the size of a cache.
+///
+/// TODO
+///
 /// This type is unique to the Rust API of `wgpu`.
 #[derive(Debug)]
 pub struct PipelineCache {
@@ -1128,8 +1137,12 @@ static_assertions::assert_impl_all!(PipelineCache: Send, Sync);
 
 impl PipelineCache {
     /// Get the data associated with this pipeline cache.
-    /// The format is unspecified, and should be passed to a call to
-    /// [`Device::create_pipeline_cache`] for a compatible device.
+    ///
+    /// The data format may be `wgpu` specific, and should therefore only be
+    /// passed to a call to [`Device::create_pipeline_cache_init`] for a
+    /// compatible device.
+    ///
+    /// This function is unique to the Rust API of `wgpu`.
     pub fn get_data(&self) -> Option<Vec<u8>> {
         self.context
             .pipeline_cache_get_data(&self.id, self.data.as_ref())
@@ -1872,6 +1885,7 @@ pub struct RenderPipelineDescriptor<'a> {
     /// If the pipeline will be used with a multiview render pass, this indicates how many array
     /// layers the attachments will have.
     pub multiview: Option<NonZeroU32>,
+    /// The pipeline cache to use for this operation
     pub cache: Option<&'a PipelineCache>,
 }
 #[cfg(send_sync)]
@@ -1966,6 +1980,7 @@ pub struct ComputePipelineDescriptor<'a> {
     /// The name of the entry point in the compiled shader. There must be a function with this name
     /// and no return value in the shader.
     pub entry_point: &'a str,
+    /// The pipeline cache to use when creating this pipeline
     pub cache: Option<&'a PipelineCache>,
     /// Advanced options for when this pipeline is compiled
     ///
@@ -1975,16 +1990,38 @@ pub struct ComputePipelineDescriptor<'a> {
 #[cfg(send_sync)]
 static_assertions::assert_impl_all!(ComputePipelineDescriptor<'_>: Send, Sync);
 
+/// Describes a pipeline cache which reuses data from a previous run.
+///
+/// For use with [`Device::create_pipeline_cache_init`].
+///
+/// This type is unique to the Rust API of `wgpu`.
 #[derive(Clone, Debug)]
 pub struct PipelineCacheInitDescriptor<'a> {
+    /// Debug label of the pipeline cache. This might show up in some logs from `wgpu`
     pub label: Label<'a>,
+    /// The data used to initialise the cache initialise the cache using
+    ///
+    /// # Safety
+    /// This data must have been provided from a previous call to
+    /// [`PipelineCache::get_data`]
     pub data: &'a [u8],
+    /// Whether to create a cache without data when the provided data
+    /// is invalid.
+    ///
+    /// Recommended to set to true
+    pub fallback: bool,
 }
 #[cfg(send_sync)]
 static_assertions::assert_impl_all!(PipelineCacheInitDescriptor<'_>: Send, Sync);
 
+/// Describes a pipeline cache when
+///
+/// For use with [`Device::create_pipeline_cache`].
+///
+/// This type is unique to the Rust API of `wgpu`.
 #[derive(Clone, Debug)]
 pub struct PipelineCacheDescriptor<'a> {
+    /// Debug label of the pipeline cache. This might show up in some logs from `wgpu`
     pub label: Label<'a>,
 }
 #[cfg(send_sync)]
@@ -3138,11 +3175,50 @@ impl Device {
         DynContext::device_make_invalid(&*self.context, &self.id, self.data.as_ref())
     }
 
+    /// Test-only function to make this device invalid.
+    #[doc(hidden)]
+    pub fn make_invalid(&self) {
+        DynContext::device_make_invalid(&*self.context, &self.id, self.data.as_ref())
+    }
+
+    /// Create a [`PipelineCache`] with initial data
+    ///
+    /// This can be passed to [`Device::create_compute_pipeline`]
+    /// and [`Device::create_render_pipeline`] to either accelerate these
+    /// or add the cache results from those.
+    ///
+    /// # Safety
+    ///
+    /// The `data` field of `desc` must have previously been returned from a call
+    /// to [`PipelineCache::get_data`][^saving]. It's recommended to only `data` for the same
+    /// [`util::pipeline_cache_key`], but this isn't a safety requirement.
+    /// This is also compatible across wgpu versions, as any data format change will
+    /// be accounted for.
+    ///
+    /// Note that this means it is *not* supported to bring caches from previous
+    /// direct uses of backend APIs into this method.
+    ///
+    /// # Errors
+    /// Returns `None` if this device does not support [`PipelineCache`]. See the
+    /// documentation on that type for details of API support
+    ///
+    /// Returns `Some` with an error value if:
+    ///  * The `fallback` field on `desc` is false; and
+    ///  * the `data` provided would not be used[^data_not_used]
+    ///
+    /// If an error value is used in subsequent calls, default caching will be used.
+    ///
+    /// [^saving]: We do recognise that saving this data to disk means this condition
+    /// is impossible to fully prove. Consider the risks for your own application in this case.
+    ///
+    /// [^data_not_used]: This data may be not used if: the data was produced by a prior
+    /// version of wgpu; or was created for an incompatible adapter, or there was a GPU driver
+    /// update. In some cases, the data might not be used and a real value is returned,
+    /// this is left to the discretion of GPU drivers.
     pub unsafe fn create_pipeline_cache_init(
         &self,
         desc: &PipelineCacheInitDescriptor<'_>,
-        // TODO: Work out error handling and conditions
-    ) -> Option<PipelineCache> {
+    ) -> PipelineCache {
         let (id, data) = unsafe {
             DynContext::device_create_pipeline_cache_init(
                 &*self.context,
@@ -3150,29 +3226,38 @@ impl Device {
                 self.data.as_ref(),
                 desc,
             )
-        }?;
-        Some(PipelineCache {
+        };
+        PipelineCache {
             context: Arc::clone(&self.context),
             id,
             data,
-        })
+        }
     }
 
-    pub fn create_pipeline_cache(
-        &self,
-        desc: &PipelineCacheDescriptor<'_>,
-    ) -> Option<PipelineCache> {
+    /// Create a pipeline cache without initial data
+    ///
+    /// This can be passed to [`Device::create_compute_pipeline`]
+    /// and [`Device::create_render_pipeline`] to intialise its cache data
+    ///
+    /// # Errors
+    /// Returns `None` if this device does not support [`PipelineCache`]. See the
+    /// documentation on that type for details of API support
+    ///
+    /// Returns `Some` with an error value if:
+    ///  * this device is invalid; or
+    ///  * the device is out of memory
+    pub fn create_pipeline_cache(&self, desc: &PipelineCacheDescriptor<'_>) -> PipelineCache {
         let (id, data) = DynContext::device_create_pipeline_cache(
             &*self.context,
             &self.id,
             self.data.as_ref(),
             desc,
-        )?;
-        Some(PipelineCache {
+        );
+        PipelineCache {
             context: Arc::clone(&self.context),
             id,
             data,
-        })
+        }
     }
 }
 
diff --git a/wgpu/src/util/mod.rs b/wgpu/src/util/mod.rs
index 3ab6639cf8..9c2fc30b92 100644
--- a/wgpu/src/util/mod.rs
+++ b/wgpu/src/util/mod.rs
@@ -140,3 +140,49 @@ impl std::ops::Deref for DownloadBuffer {
         self.1.slice()
     }
 }
+
+/// A recommended key for storing [`PipelineCache`]s for the adapter
+/// associated with the given [`AdapterInfo`](wgt::AdapterInfo)
+/// This key will define a class of adapters for which the same cache
+/// might be valid.
+///
+/// If this returns `None`, the adapter doesn't support [`PipelineCache`].
+/// This may be because the API doesn't support application managed caches
+/// (such as browser WebGPU), or that `wgpu` hasn't implemented it for
+/// that API yet.
+///
+/// This key could be used as a filename, as seen in the example below.
+///
+/// # Examples
+///
+/// ``` no_run
+/// # let adapter_info = todo!();
+/// let cache_dir: PathBuf = PathBuf::new();
+/// let filename = pipeline_cache_key(&adapter_info);
+/// let cache_file = cache_dir.join(filename);
+/// let cache_data = std::fs::read(&cache_file);
+/// let pipeline_cache: wgpu::PipelineCache = todo!("Use data (if present) to create a pipeline cache");
+///
+/// let data = pipeline_cache.get_data();
+/// if let Some(data) = data {
+///     let temp_file = cache_file.with_extension("temp");
+///     std::fs::write(&temp_file, &data)?;
+///     std::fs::rename(&temp_file, &cache_file)?;
+/// }
+/// # Ok(())
+/// ```
+///
+/// [`PipelineCache`]: super::PipelineCache
+pub fn pipeline_cache_key(adapter_info: &wgt::AdapterInfo) -> Option<String> {
+    match adapter_info.backend {
+        wgt::Backend::Vulkan => Some(format!(
+            // The vendor/device should uniquely define a driver
+            // We/the driver will also later validate that the vendor/device and driver
+            // version match, which may lead to clearing an outdated
+            // cache for the same device.
+            "wgpu_pipeline_cache_vulkan_{}_{}",
+            adapter_info.vendor, adapter_info.device
+        )),
+        _ => None,
+    }
+}

From f488e81a6eabec277dfc9edade47ca6cb7f54106 Mon Sep 17 00:00:00 2001
From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com>
Date: Thu, 29 Feb 2024 17:58:29 +0000
Subject: [PATCH 07/31] Fix solveable CI errors

Guess at correct tracker indices thing
---
 wgpu-core/src/device/resource.rs |  5 ++++-
 wgpu-core/src/track/mod.rs       |  2 ++
 wgpu-hal/src/dx12/device.rs      |  2 +-
 wgpu-hal/src/metal/device.rs     |  2 +-
 wgpu/src/backend/webgpu.rs       | 23 ++++++++++++++++++++---
 wgpu/src/lib.rs                  |  2 +-
 wgpu/src/util/mod.rs             | 23 +++++++++++++----------
 7 files changed, 42 insertions(+), 17 deletions(-)

diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index ce6d9d97c0..e35355df80 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -3542,7 +3542,10 @@ impl<A: HalApi> Device<A> {
         };
         let cache = pipeline::PipelineCache {
             device: self.clone(),
-            info: ResourceInfo::new(desc.label.borrow_or_default()),
+            info: ResourceInfo::new(
+                desc.label.borrow_or_default(),
+                Some(self.tracker_indices.pipeline_caches.clone()),
+            ),
             // This would be none in the error condition, which we don't implement yet
             raw: Some(raw),
         };
diff --git a/wgpu-core/src/track/mod.rs b/wgpu-core/src/track/mod.rs
index cc20b2a01c..932f907912 100644
--- a/wgpu-core/src/track/mod.rs
+++ b/wgpu-core/src/track/mod.rs
@@ -228,6 +228,7 @@ pub(crate) struct TrackerIndexAllocators {
     pub pipeline_layouts: Arc<SharedTrackerIndexAllocator>,
     pub bundles: Arc<SharedTrackerIndexAllocator>,
     pub query_sets: Arc<SharedTrackerIndexAllocator>,
+    pub pipeline_caches: Arc<SharedTrackerIndexAllocator>,
 }
 
 impl TrackerIndexAllocators {
@@ -245,6 +246,7 @@ impl TrackerIndexAllocators {
             pipeline_layouts: Arc::new(SharedTrackerIndexAllocator::new()),
             bundles: Arc::new(SharedTrackerIndexAllocator::new()),
             query_sets: Arc::new(SharedTrackerIndexAllocator::new()),
+            pipeline_caches: Arc::new(SharedTrackerIndexAllocator::new()),
         }
     }
 }
diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs
index 0948cbcdac..661320bb93 100644
--- a/wgpu-hal/src/dx12/device.rs
+++ b/wgpu-hal/src/dx12/device.rs
@@ -1519,7 +1519,7 @@ impl crate::Device for super::Device {
 
     unsafe fn create_pipeline_cache(
         &self,
-        desc: &crate::PipelineCacheDescriptor<'_>,
+        _desc: &crate::PipelineCacheDescriptor<'_>,
     ) -> Result<(), crate::PipelineCacheError> {
         Ok(())
     }
diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs
index c3b774ca40..81ab5dbdb6 100644
--- a/wgpu-hal/src/metal/device.rs
+++ b/wgpu-hal/src/metal/device.rs
@@ -1101,7 +1101,7 @@ impl crate::Device for super::Device {
 
     unsafe fn create_pipeline_cache(
         &self,
-        desc: &crate::PipelineCacheDescriptor<'_>,
+        _desc: &crate::PipelineCacheDescriptor<'_>,
     ) -> Result<(), crate::PipelineCacheError> {
         Ok(())
     }
diff --git a/wgpu/src/backend/webgpu.rs b/wgpu/src/backend/webgpu.rs
index 18f1a0f0f3..5fbbc4668a 100644
--- a/wgpu/src/backend/webgpu.rs
+++ b/wgpu/src/backend/webgpu.rs
@@ -1997,14 +1997,23 @@ impl crate::context::Context for ContextWebGpu {
         create_identified(device_data.0.create_compute_pipeline(&mapped_desc))
     }
 
+    fn device_create_pipeline_cache(
+        &self,
+        _: &Self::DeviceId,
+        _: &Self::DeviceData,
+        _: &crate::PipelineCacheDescriptor<'_>,
+    ) -> (Self::PipelineCacheId, Self::PipelineCacheData) {
+        (Unused, ())
+    }
     unsafe fn device_create_pipeline_cache_init(
         &self,
         _: &Self::DeviceId,
         _: &Self::DeviceData,
-        _: &PipelineCacheInitDescriptor<'_>,
-    ) -> Option<(Self::PipelineCacheId, Self::PipelineCacheData)> {
-        None
+        _: &crate::PipelineCacheInitDescriptor<'_>,
+    ) -> (Self::PipelineCacheId, Self::PipelineCacheData) {
+        (Unused, ())
     }
+    fn pipeline_cache_drop(&self, _: &Self::PipelineCacheId, _: &Self::PipelineCacheData) {}
 
     fn device_create_buffer(
         &self,
@@ -2992,6 +3001,14 @@ impl crate::context::Context for ContextWebGpu {
     fn device_start_capture(&self, _device: &Self::DeviceId, _device_data: &Self::DeviceData) {}
     fn device_stop_capture(&self, _device: &Self::DeviceId, _device_data: &Self::DeviceData) {}
 
+    fn pipeline_cache_get_data(
+        &self,
+        _: &Self::PipelineCacheId,
+        _: &Self::PipelineCacheData,
+    ) -> Option<Vec<u8>> {
+        None
+    }
+
     fn compute_pass_set_pipeline(
         &self,
         _pass: &mut Self::ComputePassId,
diff --git a/wgpu/src/lib.rs b/wgpu/src/lib.rs
index de3f524ccc..fd7225230d 100644
--- a/wgpu/src/lib.rs
+++ b/wgpu/src/lib.rs
@@ -3237,7 +3237,7 @@ impl Device {
     /// Create a pipeline cache without initial data
     ///
     /// This can be passed to [`Device::create_compute_pipeline`]
-    /// and [`Device::create_render_pipeline`] to intialise its cache data
+    /// and [`Device::create_render_pipeline`] to initialise its cache data
     ///
     /// # Errors
     /// Returns `None` if this device does not support [`PipelineCache`]. See the
diff --git a/wgpu/src/util/mod.rs b/wgpu/src/util/mod.rs
index 9c2fc30b92..ce5af6fb6c 100644
--- a/wgpu/src/util/mod.rs
+++ b/wgpu/src/util/mod.rs
@@ -156,20 +156,23 @@ impl std::ops::Deref for DownloadBuffer {
 /// # Examples
 ///
 /// ``` no_run
+/// # use std::path::PathBuf;
 /// # let adapter_info = todo!();
 /// let cache_dir: PathBuf = PathBuf::new();
-/// let filename = pipeline_cache_key(&adapter_info);
-/// let cache_file = cache_dir.join(filename);
-/// let cache_data = std::fs::read(&cache_file);
-/// let pipeline_cache: wgpu::PipelineCache = todo!("Use data (if present) to create a pipeline cache");
+/// let filename = wgpu::util::pipeline_cache_key(&adapter_info);
+/// if let Some(filename) = filename {
+///     let cache_file = cache_dir.join(&filename);
+///     let cache_data = std::fs::read(&cache_file);
+///     let pipeline_cache: wgpu::PipelineCache = todo!("Use data (if present) to create a pipeline cache");
 ///
-/// let data = pipeline_cache.get_data();
-/// if let Some(data) = data {
-///     let temp_file = cache_file.with_extension("temp");
-///     std::fs::write(&temp_file, &data)?;
-///     std::fs::rename(&temp_file, &cache_file)?;
+///     let data = pipeline_cache.get_data();
+///     if let Some(data) = data {
+///         let temp_file = cache_file.with_extension("temp");
+///         std::fs::write(&temp_file, &data)?;
+///         std::fs::rename(&temp_file, &cache_file)?;
+///     }
 /// }
-/// # Ok(())
+/// # Ok::<(), std::io::Error>(())
 /// ```
 ///
 /// [`PipelineCache`]: super::PipelineCache

From eb173311524e2b4828a1755de6078ae8bc579efe Mon Sep 17 00:00:00 2001
From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com>
Date: Fri, 1 Mar 2024 16:53:38 +0000
Subject: [PATCH 08/31] Add a CHANGELOG entry

---
 CHANGELOG.md | 114 ++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 82 insertions(+), 32 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 88f8aade9f..ea156a294c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -46,6 +46,7 @@ Bottom level categories:
 Wgpu now supports querying [shader compilation info](https://www.w3.org/TR/webgpu/#dom-gpushadermodule-getcompilationinfo).
 
 This allows you to get more structured information about compilation errors, warnings and info:
+
 ```rust
 ...
 let lighting_shader = ctx.device.create_shader_module(include_wgsl!("lighting.wgsl"));
@@ -62,9 +63,11 @@ for message in compilation_info
 
 By @stefnotch in [#5410](https://github.com/gfx-rs/wgpu/pull/5410)
 
+### New features
 
+#### Vulkan
 
-### New features
+- Added a `PipelineCache` resource to allow using Vulkan pipeline caches. By @DJMcNab in [#5319](https://github.com/gfx-rs/wgpu/pull/5319)
 
 #### General
 
@@ -74,7 +77,7 @@ By @stefnotch in [#5410](https://github.com/gfx-rs/wgpu/pull/5410)
 
 #### GLES / OpenGL
 
--  Fix regression on OpenGL (EGL) where non-sRGB still used sRGB [#5642](https://github.com/gfx-rs/wgpu/pull/5642)
+- Fix regression on OpenGL (EGL) where non-sRGB still used sRGB [#5642](https://github.com/gfx-rs/wgpu/pull/5642)
 
 ## v0.20.0 (2024-04-28)
 
@@ -85,10 +88,13 @@ By @stefnotch in [#5410](https://github.com/gfx-rs/wgpu/pull/5410)
 Wgpu supports now [pipeline-overridable constants](https://www.w3.org/TR/webgpu/#dom-gpuprogrammablestage-constants)
 
 This allows you to define constants in wgsl like this:
+
 ```rust
 override some_factor: f32 = 42.1337; // Specifies a default of 42.1337 if it's not set.
 ```
+
 And then set them at runtime like so on your pipeline consuming this shader:
+
 ```rust
 // ...
 fragment: Some(wgpu::FragmentState {
@@ -110,7 +116,6 @@ Due to a specification change `write_timestamp` is no longer supported on WebGPU
 
 By @wumpf in [#5188](https://github.com/gfx-rs/wgpu/pull/5188)
 
-
 #### Wgsl const evaluation for many more built-ins
 
 Many numeric built-ins have had a constant evaluation implementation added for them, which allows them to be used in a `const` context:
@@ -127,17 +132,17 @@ The following subgroup operations are available in wgsl now:
 
 `subgroupBallot`, `subgroupAll`, `subgroupAny`, `subgroupAdd`, `subgroupMul`, `subgroupMin`, `subgroupMax`, `subgroupAnd`, `subgroupOr`, `subgroupXor`, `subgroupExclusiveAdd`, `subgroupExclusiveMul`, `subgroupInclusiveAdd`, `subgroupInclusiveMul`, `subgroupBroadcastFirst`, `subgroupBroadcast`, `subgroupShuffle`, `subgroupShuffleDown`, `subgroupShuffleUp`, `subgroupShuffleXor`
 
-
 Availability is governed by the following feature flags:
-* `wgpu::Features::SUBGROUP` for all operations except `subgroupBarrier` in fragment & compute, supported on Vulkan, DX12 and Metal.
-* `wgpu::Features::SUBGROUP_VERTEX`, for all operations except `subgroupBarrier` general operations in  vertex shaders, supported on Vulkan
-* `wgpu::Features::SUBGROUP_BARRIER`, for support of the `subgroupBarrier` operation, supported on Vulkan & Metal
+
+- `wgpu::Features::SUBGROUP` for all operations except `subgroupBarrier` in fragment & compute, supported on Vulkan, DX12 and Metal.
+- `wgpu::Features::SUBGROUP_VERTEX`, for all operations except `subgroupBarrier` general operations in  vertex shaders, supported on Vulkan
+- `wgpu::Features::SUBGROUP_BARRIER`, for support of the `subgroupBarrier` operation, supported on Vulkan & Metal
 
 Note that there currently [some differences](https://github.com/gfx-rs/wgpu/issues/5555) between wgpu's native-only implementation and the [open WebGPU proposal](https://github.com/gpuweb/gpuweb/blob/main/proposals/subgroups.md).
 
 By @exrook and @lichtso in [#5301](https://github.com/gfx-rs/wgpu/pull/5301)
 
-##### Signed and unsigned 64 bit integer support in shaders.
+##### Signed and unsigned 64 bit integer support in shaders
 
 `wgpu::Features::SHADER_INT64` enables 64 bit integer signed and unsigned integer variables in wgsl (`i64` and `u64` respectively).
 Supported on Vulkan, DX12 (requires DXC) and Metal (with MSL 2.3+ support).
@@ -156,10 +161,12 @@ By @atlv24 and @cwfitzgerald in [#5154](https://github.com/gfx-rs/wgpu/pull/5154
   - This has been added to the set of flags set by `InstanceFlags::advanced_debugging`. Since the overhead is potentially very large, the flag is not enabled by default in debug builds when using `InstanceFlags::from_build_config`.
   - As with other instance flags, this flag can be changed in calls to `InstanceFlags::with_env` with the new `WGPU_GPU_BASED_VALIDATION` environment variable.
 - `wgpu::Instance` can now report which `wgpu::Backends` are available based on the build configuration. By @wumpf [#5167](https://github.com/gfx-rs/wgpu/pull/5167)
+
   ```diff
   -wgpu::Instance::any_backend_feature_enabled()
   +!wgpu::Instance::enabled_backend_features().is_empty()
   ```
+
 - Breaking change: [`wgpu_core::pipeline::ProgrammableStageDescriptor`](https://docs.rs/wgpu-core/latest/wgpu_core/pipeline/struct.ProgrammableStageDescriptor.html#structfield.entry_point) is now optional. By @ErichDonGubler in [#5305](https://github.com/gfx-rs/wgpu/pull/5305).
 - `Features::downlevel{_webgl2,}_features` was made const by @MultisampledNight in [#5343](https://github.com/gfx-rs/wgpu/pull/5343)
 - Breaking change: [`wgpu_core::pipeline::ShaderError`](https://docs.rs/wgpu-core/latest/wgpu_core/pipeline/struct.ShaderError.html) has been moved to `naga`. By @stefnotch in [#5410](https://github.com/gfx-rs/wgpu/pull/5410)
@@ -216,6 +223,7 @@ By @atlv24 and @cwfitzgerald in [#5154](https://github.com/gfx-rs/wgpu/pull/5154
 ### Bug Fixes
 
 #### General
+
 - Fix `serde` feature not compiling for `wgpu-types`. By @KirmesBude in [#5149](https://github.com/gfx-rs/wgpu/pull/5149)
 - Fix the validation of vertex and index ranges. By @nical in [#5144](https://github.com/gfx-rs/wgpu/pull/5144) and [#5156](https://github.com/gfx-rs/wgpu/pull/5156)
 - Fix panic when creating a surface while no backend is available. By @wumpf [#5166](https://github.com/gfx-rs/wgpu/pull/5166)
@@ -269,6 +277,7 @@ By @atlv24 and @cwfitzgerald in [#5154](https://github.com/gfx-rs/wgpu/pull/5154
 - Fix deadlock in certain situations when mapping buffers using `wgpu-profiler`. By @cwfitzgerald in [#5517](https://github.com/gfx-rs/wgpu/pull/5517)
 
 #### WebGPU
+
 - Correctly pass through timestamp queries to WebGPU. By @cwfitzgerald in [#5527](https://github.com/gfx-rs/wgpu/pull/5527).
 
 ## v0.19.3 (2024-03-01)
@@ -295,8 +304,8 @@ By @cwfitzgerald in [#5325](https://github.com/gfx-rs/wgpu/pull/5325).
 - Fix incorrect validation causing all indexed draws on render bundles to fail. By @wumpf in [#5430](https://github.com/gfx-rs/wgpu/pull/5340).
 
 #### Android
-- Fix linking error when targeting android without `winit`. By @ashdnazg in [#5326](https://github.com/gfx-rs/wgpu/pull/5326).
 
+- Fix linking error when targeting android without `winit`. By @ashdnazg in [#5326](https://github.com/gfx-rs/wgpu/pull/5326).
 
 ## v0.19.2 (2024-02-29)
 
@@ -305,15 +314,19 @@ This release includes `wgpu`, `wgpu-core`, `wgpu-hal`, `wgpu-types`, and `naga`.
 ### Added/New Features
 
 #### General
+
 - `wgpu::Id` now implements `PartialOrd`/`Ord` allowing it to be put in `BTreeMap`s. By @cwfitzgerald and @9291Sam in [#5176](https://github.com/gfx-rs/wgpu/pull/5176)
 
 #### OpenGL
+
 - Log an error when OpenGL texture format heuristics fail. By @PolyMeilex in [#5266](https://github.com/gfx-rs/wgpu/issues/5266)
 
 #### `wgsl-out`
+
 - Learned to generate acceleration structure types. By @JMS55 in [#5261](https://github.com/gfx-rs/wgpu/pull/5261)
 
 ### Documentation
+
 - Fix link in `wgpu::Instance::create_surface` documentation. By @HexoKnight in [#5280](https://github.com/gfx-rs/wgpu/pull/5280).
 - Fix typo in `wgpu::CommandEncoder::clear_buffer` documentation. By @PWhiddy in [#5281](https://github.com/gfx-rs/wgpu/pull/5281).
 - `Surface` configuration incorrectly claimed that `wgpu::Instance::create_surface` was unsafe. By @hackaugusto in [#5265](https://github.com/gfx-rs/wgpu/pull/5265).
@@ -321,6 +334,7 @@ This release includes `wgpu`, `wgpu-core`, `wgpu-hal`, `wgpu-types`, and `naga`.
 ### Bug Fixes
 
 #### General
+
 - Device lost callbacks are invoked when replaced and when global is dropped. By @bradwerth in [#5168](https://github.com/gfx-rs/wgpu/pull/5168)
 - Fix performance regression when allocating a large amount of resources of the same type. By @nical in [#5229](https://github.com/gfx-rs/wgpu/pull/5229)
 - Fix docs.rs wasm32 builds. By @cwfitzgerald in [#5310](https://github.com/gfx-rs/wgpu/pull/5310)
@@ -329,9 +343,11 @@ This release includes `wgpu`, `wgpu-core`, `wgpu-hal`, `wgpu-types`, and `naga`.
 - Fix missing validation for `Device::clear_buffer` where `offset + size > buffer.size` was not checked when `size` was omitted. By @ErichDonGubler in [#5282](https://github.com/gfx-rs/wgpu/pull/5282).
 
 #### DX12
+
 - Fix `panic!` when dropping `Instance` without `InstanceFlags::VALIDATION`. By @hakolao in [#5134](https://github.com/gfx-rs/wgpu/pull/5134)
 
 #### OpenGL
+
 - Fix internal format for the `Etc2Rgba8Unorm` format. By @andristarr in [#5178](https://github.com/gfx-rs/wgpu/pull/5178)
 - Try to load `libX11.so.6` in addition to `libX11.so` on linux. [#5307](https://github.com/gfx-rs/wgpu/pull/5307)
 - Make use of `GL_EXT_texture_shadow_lod` to support sampling a cube depth texture with an explicit LOD. By @cmrschwarz in #[5171](https://github.com/gfx-rs/wgpu/pull/5171).
@@ -340,7 +356,6 @@ This release includes `wgpu`, `wgpu-core`, `wgpu-hal`, `wgpu-types`, and `naga`.
 
 - Fix code generation from nested loops. By @cwfitzgerald and @teoxoy in [#5311](https://github.com/gfx-rs/wgpu/pull/5311)
 
-
 ## v0.19.1 (2024-01-22)
 
 This release includes `wgpu` and `wgpu-hal`. The rest of the crates are unchanged since 0.19.0.
@@ -365,10 +380,10 @@ This release includes `wgpu` and `wgpu-hal`. The rest of the crates are unchange
 
 - Document Wayland specific behavior related to `SurfaceTexture::present`. By @i509VCB in [#5093](https://github.com/gfx-rs/wgpu/pull/5093).
 
-
 ## v0.19.0 (2024-01-17)
 
 This release includes:
+
 - `wgpu`
 - `wgpu-core`
 - `wgpu-hal`
@@ -390,6 +405,7 @@ By @gents83 in [#3626](https://github.com/gfx-rs/wgpu/pull/3626) and thanks also
 
 All of wgpu's public dependencies are now re-exported at the top level so that users don't need to take their own dependencies.
 This includes:
+
 - wgpu-core
 - wgpu-hal
 - naga
@@ -436,6 +452,7 @@ By @i509VCB in [#4754](https://github.com/gfx-rs/wgpu/pull/4754).
 ### `DeviceExt::create_texture_with_data` allows Mip-Major Data
 
 Previously, `DeviceExt::create_texture_with_data` only allowed data to be provided in layer major order. There is now a `order` parameter which allows you to specify if the data is in layer major or mip major order.
+
 ```diff
     let tex = ctx.device.create_texture_with_data(
         &queue,
@@ -455,6 +472,7 @@ Passing an owned value `window` to `Surface` will return a `wgpu::Surface<'stati
 All possible safe variants (owned windows and web canvases) are grouped using `wgpu::SurfaceTarget`.
 Conversion to `wgpu::SurfaceTarget` is automatic for any type implementing `raw-window-handle`'s `HasWindowHandle` & `HasDisplayHandle` traits, i.e. most window types.
 For web canvas types this has to be done explicitly:
+
 ```rust
 let surface: wgpu::Surface<'static> = instance.create_surface(wgpu::SurfaceTarget::Canvas(my_canvas))?;
 ```
@@ -464,12 +482,15 @@ All unsafe variants are now grouped under `wgpu::Instance::create_surface_unsafe
 
 In order to create a `wgpu::Surface<'static>` without passing ownership of the window use
 `wgpu::SurfaceTargetUnsafe::from_window`:
+
 ```rust
 let surface = unsafe {
   instance.create_surface_unsafe(wgpu::SurfaceTargetUnsafe::from_window(&my_window))?
 };
 ```
+
 The easiest way to make this code safe is to use shared ownership:
+
 ```rust
 let window: Arc<winit::Window>;
 // ...
@@ -489,21 +510,27 @@ automatically converting literals and other constant expressions
 from abstract numeric types to concrete types when safe and
 necessary. For example, to build a vector of floating-point
 numbers, Naga previously made you write:
+
 ```rust
 vec3<f32>(1.0, 2.0, 3.0)
 ```
+
 With this change, you can now simply write:
+
 ```rust
 vec3<f32>(1, 2, 3)
 ```
+
 Even though the literals are abstract integers, Naga recognizes
 that it is safe and necessary to convert them to `f32` values in
 order to build the vector. You can also use abstract values as
 initializers for global constants and global and local variables,
 like this:
+
 ```rust
 var unit_x: vec2<f32> = vec2(1, 0);
 ```
+
 The literals `1` and `0` are abstract integers, and the expression
 `vec2(1, 0)` is an abstract vector. However, Naga recognizes that
 it can convert that to the concrete type `vec2<f32>` to satisfy
@@ -546,6 +573,7 @@ By @cwfitzgerald in [#5053](https://github.com/gfx-rs/wgpu/pull/5053)
 ### New Features
 
 #### General
+
 - Added `DownlevelFlags::VERTEX_AND_INSTANCE_INDEX_RESPECTS_RESPECTIVE_FIRST_VALUE_IN_INDIRECT_DRAW` to know if `@builtin(vertex_index)` and `@builtin(instance_index)` will respect the `first_vertex` / `first_instance` in indirect calls. If this is not present, both will always start counting from 0. Currently enabled on all backends except DX12. By @cwfitzgerald in [#4722](https://github.com/gfx-rs/wgpu/pull/4722).
 - Added support for the `FLOAT32_FILTERABLE` feature (web and native, corresponds to WebGPU's `float32-filterable`). By @almarklein in [#4759](https://github.com/gfx-rs/wgpu/pull/4759).
 - GPU buffer memory is released during "lose the device". By @bradwerth in [#4851](https://github.com/gfx-rs/wgpu/pull/4851).
@@ -560,6 +588,7 @@ By @cwfitzgerald in [#5053](https://github.com/gfx-rs/wgpu/pull/5053)
 - `SurfaceConfiguration` now exposes `desired_maximum_frame_latency` which was previously hard-coded to 2. By setting it to 1 you can reduce latency under the risk of making GPU & CPU work sequential. Currently, on DX12 this affects the `MaximumFrameLatency`, on all other backends except OpenGL the size of the swapchain (on OpenGL this has no effect). By @emilk & @wumpf in [#4899](https://github.com/gfx-rs/wgpu/pull/4899)
 
 #### OpenGL
+
 - `@builtin(instance_index)` now properly reflects the range provided in the draw call instead of always counting from 0. By @cwfitzgerald in [#4722](https://github.com/gfx-rs/wgpu/pull/4722).
 - Desktop GL now supports `POLYGON_MODE_LINE` and `POLYGON_MODE_POINT`. By @valaphee in [#4836](https://github.com/gfx-rs/wgpu/pull/4836).
 
@@ -631,6 +660,7 @@ This release includes `naga` version 0.14.2. The crates `wgpu-core`, `wgpu-hal`
 ### Bug Fixes
 
 #### Naga
+
 - When evaluating const-expressions and generating SPIR-V, properly handle `Compose` expressions whose operands are `Splat` expressions. Such expressions are created and marked as constant by the constant evaluator. By @jimblandy in [#4695](https://github.com/gfx-rs/wgpu/pull/4695).
 
 ## v0.18.1 (2023-11-15)
@@ -640,15 +670,18 @@ This release includes `naga` version 0.14.2. The crates `wgpu-core`, `wgpu-hal`
 ### Bug Fixes
 
 #### General
+
 - Fix panic in `Surface::configure` in debug builds. By @cwfitzgerald in [#4635](https://github.com/gfx-rs/wgpu/pull/4635)
 - Fix crash when all the following are true: By @teoxoy in #[#4642](https://github.com/gfx-rs/wgpu/pull/4642)
   - Passing a naga module directly to `Device::create_shader_module`.
   - `InstanceFlags::DEBUG` is enabled.
 
 #### DX12
+
 - Always use HLSL 2018 when using DXC to compile HLSL shaders. By @daxpedda in [#4629](https://github.com/gfx-rs/wgpu/pull/4629)
 
 #### Metal
+
 - In Metal Shading Language output, fix issue where local variables were sometimes using variable names from previous functions. By @DJMcNab in [#4594](https://github.com/gfx-rs/wgpu/pull/4594)
 
 ## v0.18.0 (2023-10-25)
@@ -794,7 +827,7 @@ let instance = wgpu::Instance::new(InstanceDescriptor {
 `gles_minor_version`: By @PJB3005 in [#3998](https://github.com/gfx-rs/wgpu/pull/3998)
 `flags`: By @nical in [#4230](https://github.com/gfx-rs/wgpu/pull/4230)
 
-### Many New Examples!
+### Many New Examples
 
 - Added the following examples: By @JustAnotherCodemonkey in [#3885](https://github.com/gfx-rs/wgpu/pull/3885).
   - [repeated-compute](https://github.com/gfx-rs/wgpu/tree/trunk/examples/repeated-compute)
@@ -844,7 +877,6 @@ By @teoxoy in [#4185](https://github.com/gfx-rs/wgpu/pull/4185)
 - Allow filtering labels out before they are passed to GPU drivers by @nical in [https://github.com/gfx-rs/wgpu/pull/4246](4246)
 - `DeviceLostClosure` callback mechanism provided so user agents can resolve `GPUDevice.lost` Promises at the appropriate time by @bradwerth in [#4645](https://github.com/gfx-rs/wgpu/pull/4645)
 
-
 #### Vulkan
 
 - Rename `wgpu_hal::vulkan::Instance::required_extensions` to `desired_extensions`. By @jimblandy in [#4115](https://github.com/gfx-rs/wgpu/pull/4115)
@@ -917,7 +949,7 @@ By @teoxoy in [#4185](https://github.com/gfx-rs/wgpu/pull/4185)
 
 ### Added/New Features
 
-- Add `get_mapped_range_as_array_buffer` for faster buffer read-backs in wasm builds. By @ryankaplan in [#4042] (https://github.com/gfx-rs/wgpu/pull/4042).
+- Add `get_mapped_range_as_array_buffer` for faster buffer read-backs in wasm builds. By @ryankaplan in [#4042] (<https://github.com/gfx-rs/wgpu/pull/4042>).
 
 ### Bug Fixes
 
@@ -973,7 +1005,6 @@ By @fornwall in [#3904](https://github.com/gfx-rs/wgpu/pull/3904) and [#3905](ht
 
 - Added support for importing external buffers using `buffer_from_raw` (Dx12, Metal, Vulkan) and `create_buffer_from_hal`. By @AdrianEddy in [#3355](https://github.com/gfx-rs/wgpu/pull/3355)
 
-
 #### Vulkan
 
 - Work around [Vulkan-ValidationLayers#5671](https://github.com/KhronosGroup/Vulkan-ValidationLayers/issues/5671) by ignoring reports of violations of [VUID-vkCmdEndDebugUtilsLabelEXT-commandBuffer-01912](https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-vkCmdEndDebugUtilsLabelEXT-commandBuffer-01912). By @jimblandy in [#3809](https://github.com/gfx-rs/wgpu/pull/3809).
@@ -984,7 +1015,7 @@ By @fornwall in [#3904](https://github.com/gfx-rs/wgpu/pull/3904) and [#3905](ht
 
 - Empty scissor rects are allowed now, matching the specification. by @PJB3005 in [#3863](https://github.com/gfx-rs/wgpu/pull/3863).
 - Add back components info to `TextureFormat`s. By @teoxoy in [#3843](https://github.com/gfx-rs/wgpu/pull/3843).
-- Add `get_mapped_range_as_array_buffer` for faster buffer read-backs in wasm builds. By @ryankaplan in [#4042] (https://github.com/gfx-rs/wgpu/pull/4042).
+- Add `get_mapped_range_as_array_buffer` for faster buffer read-backs in wasm builds. By @ryankaplan in [#4042] (<https://github.com/gfx-rs/wgpu/pull/4042>).
 
 ### Documentation
 
@@ -1047,7 +1078,7 @@ By @fornwall in [#3904](https://github.com/gfx-rs/wgpu/pull/3904) and [#3905](ht
 
 #### DX12
 
-- Increase the `max_storage_buffers_per_shader_stage` and `max_storage_textures_per_shader_stage` limits based on what the hardware supports. by @Elabajaba in [#3798]https://github.com/gfx-rs/wgpu/pull/3798
+- Increase the `max_storage_buffers_per_shader_stage` and `max_storage_textures_per_shader_stage` limits based on what the hardware supports. by @Elabajaba in [#3798]<https://github.com/gfx-rs/wgpu/pull/3798>
 
 ## v0.16.1 (2023-05-24)
 
@@ -1062,7 +1093,7 @@ By @fornwall in [#3904](https://github.com/gfx-rs/wgpu/pull/3904) and [#3905](ht
 
 #### WebGPU
 
-* Fix crash when calling `create_surface_from_canvas`. By @grovesNL in [#3718](https://github.com/gfx-rs/wgpu/pull/3718)
+- Fix crash when calling `create_surface_from_canvas`. By @grovesNL in [#3718](https://github.com/gfx-rs/wgpu/pull/3718)
 
 ## v0.16.0 (2023-04-19)
 
@@ -1081,7 +1112,6 @@ By @fornwall in [#3904](https://github.com/gfx-rs/wgpu/pull/3904) and [#3905](ht
 
 The `TextureFormat::describe` function was removed in favor of separate functions: `block_dimensions`, `is_compressed`, `is_srgb`, `required_features`, `guaranteed_format_features`, `sample_type` and `block_size`.
 
-
 ```diff
 - let block_dimensions = format.describe().block_dimensions;
 + let block_dimensions = format.block_dimensions();
@@ -1174,6 +1204,7 @@ By @cwfitzgerald in [#3671](https://github.com/gfx-rs/wgpu/pull/3671).
 ### Added/New Features
 
 #### General
+
 - Added feature flags for ray-tracing (currently only hal): `RAY_QUERY` and `RAY_TRACING` @daniel-keitel (started by @expenses) in [#3507](https://github.com/gfx-rs/wgpu/pull/3507)
 
 #### Vulkan
@@ -1184,7 +1215,6 @@ By @cwfitzgerald in [#3671](https://github.com/gfx-rs/wgpu/pull/3671).
 
 - Added basic ray-tracing api for acceleration structures, and ray-queries @daniel-keitel (started by @expenses) in [#3507](https://github.com/gfx-rs/wgpu/pull/3507)
 
-
 ### Changes
 
 #### General
@@ -1211,12 +1241,14 @@ By @cwfitzgerald in [#3671](https://github.com/gfx-rs/wgpu/pull/3671).
 - Add support for `Features::RG11B10UFLOAT_RENDERABLE`. By @mockersf in [#3689](https://github.com/gfx-rs/wgpu/pull/3689)
 
 #### Vulkan
+
 - Set `max_memory_allocation_size` via `PhysicalDeviceMaintenance3Properties`. By @jinleili in [#3567](https://github.com/gfx-rs/wgpu/pull/3567)
 - Silence false-positive validation error about surface resizing. By @seabassjh in [#3627](https://github.com/gfx-rs/wgpu/pull/3627)
 
 ### Bug Fixes
 
 #### General
+
 - `copyTextureToTexture` src/dst aspects must both refer to all aspects of src/dst format. By @teoxoy in [#3431](https://github.com/gfx-rs/wgpu/pull/3431)
 - Validate before extracting texture selectors. By @teoxoy in [#3487](https://github.com/gfx-rs/wgpu/pull/3487)
 - Fix fatal errors (those which panic even if an error handler is set) not including all of the details. By @kpreid in [#3563](https://github.com/gfx-rs/wgpu/pull/3563)
@@ -1224,27 +1256,33 @@ By @cwfitzgerald in [#3671](https://github.com/gfx-rs/wgpu/pull/3671).
 - Fix surfaces not being dropped until exit. By @benjaminschaaf in [#3647](https://github.com/gfx-rs/wgpu/pull/3647)
 
 #### WebGPU
+
 - Fix handling of `None` values for `depth_ops` and `stencil_ops` in `RenderPassDescriptor::depth_stencil_attachment`. By @niklaskorz in [#3660](https://github.com/gfx-rs/wgpu/pull/3660)
 - Avoid using `WasmAbi` functions for WebGPU backend. By @grovesNL in [#3657](https://github.com/gfx-rs/wgpu/pull/3657)
 
 #### DX12
+
 - Use typeless formats for textures that might be viewed as srgb or non-srgb. By @teoxoy in [#3555](https://github.com/gfx-rs/wgpu/pull/3555)
 
 #### GLES
+
 - Set FORCE_POINT_SIZE if it is vertex shader with mesh consist of point list. By @REASY in [3440](https://github.com/gfx-rs/wgpu/pull/3440)
 - Remove unwraps inside `surface.configure`. By @cwfitzgerald in [#3585](https://github.com/gfx-rs/wgpu/pull/3585)
 - Fix `copy_external_image_to_texture`, `copy_texture_to_texture` and `copy_buffer_to_texture` not taking the specified index into account if the target texture is a cube map, 2D texture array or cube map array. By @daxpedda [#3641](https://github.com/gfx-rs/wgpu/pull/3641)
 - Fix disabling of vertex attributes with non-consecutive locations. By @Azorlogh in [#3706](https://github.com/gfx-rs/wgpu/pull/3706)
 
 #### Metal
+
 - Fix metal erroring on an `array_stride` of 0. By @teoxoy in [#3538](https://github.com/gfx-rs/wgpu/pull/3538)
 - `create_texture` returns an error if `new_texture` returns NULL. By @jinleili in [#3554](https://github.com/gfx-rs/wgpu/pull/3554)
 - Fix shader bounds checking being ignored. By @FL33TW00D in [#3603](https://github.com/gfx-rs/wgpu/pull/3603)
 
 #### Vulkan
+
 - Treat `VK_SUBOPTIMAL_KHR` as `VK_SUCCESS` on Android due to rotation issues. By @James2022-rgb in [#3525](https://github.com/gfx-rs/wgpu/pull/3525)
 
 ### Examples
+
 - Use `BufferUsages::QUERY_RESOLVE` instead of `BufferUsages::COPY_DST` for buffers used in `CommandEncoder::resolve_query_set` calls in `mipmap` example. By @JolifantoBambla in [#3489](https://github.com/gfx-rs/wgpu/pull/3489)
 
 ## v0.15.3 (2023-03-22)
@@ -1252,22 +1290,25 @@ By @cwfitzgerald in [#3671](https://github.com/gfx-rs/wgpu/pull/3671).
 ### Bug Fixes
 
 #### Metal
+
 - Fix incorrect mipmap being sampled when using `MinLod <= 0.0` and `MaxLod >= 32.0` or when the fragment shader samples different Lods in the same quad. By @cwfitzgerald in [#3610](https://github.com/gfx-rs/wgpu/pull/3610).
 
 #### GLES
+
 - Fix `Vertex buffer is not big enough for the draw call.` for ANGLE/Web when rendering with instance attributes on a single instance. By @wumpf in [#3596](https://github.com/gfx-rs/wgpu/pull/3596)
 - Reset all queue state between command buffers in a submit. By @jleibs [#3589](https://github.com/gfx-rs/wgpu/pull/3589)
 - Reset the state of `SAMPLE_ALPHA_TO_COVERAGE` on queue reset. By @jleibs [#3589](https://github.com/gfx-rs/wgpu/pull/3589)
 
-
 ## wgpu-0.15.2 (2023-03-08)
 
 ### Bug Fixes
 
 #### Metal
+
 - Fix definition of `NSOperatingSystemVersion` to avoid potential crashes. By @grovesNL in [#3557](https://github.com/gfx-rs/wgpu/pull/3557)
 
 #### GLES
+
 - Enable `WEBGL_debug_renderer_info` before querying unmasked vendor/renderer to avoid crashing on emscripten in [#3519](https://github.com/gfx-rs/wgpu/pull/3519)
 
 ## wgpu-0.15.1 (2023-02-09)
@@ -1275,29 +1316,37 @@ By @cwfitzgerald in [#3671](https://github.com/gfx-rs/wgpu/pull/3671).
 ### Changes
 
 #### General
+
 - Fix for some minor issues in comments on some features. By @Wumpf in [#3455](https://github.com/gfx-rs/wgpu/pull/3455)
 
 #### Vulkan
+
 - Improve format MSAA capabilities detection. By @jinleili in [#3429](https://github.com/gfx-rs/wgpu/pull/3429)
 
 #### DX12
+
 - Update gpu allocator to 0.22. By @Elabajaba in [#3447](https://github.com/gfx-rs/wgpu/pull/3447)
 
 #### WebGPU
+
 - Implement `CommandEncoder::clear_buffer`. By @raphlinus in [#3426](https://github.com/gfx-rs/wgpu/pull/3426)
 
 ### Bug Fixes
 
 #### General
+
 - Re-sort supported surface formats based on srgb-ness. By @cwfitzgerald in [#3444](https://github.com/gfx-rs/wgpu/pull/3444)
 
 #### Vulkan
+
 - Fix surface view formats validation error. By @jinleili in [#3432](https://github.com/gfx-rs/wgpu/pull/3432)
 
 #### DX12
+
 - Fix DXC validation issues when using a custom `dxil_path`. By @Elabajaba in [#3434](https://github.com/gfx-rs/wgpu/pull/3434)
 
 #### GLES
+
 - Unbind vertex buffers at end of renderpass. By @cwfitzgerald in [#3459](https://github.com/gfx-rs/wgpu/pull/3459)
 
 #### WebGPU
@@ -1307,14 +1356,13 @@ By @cwfitzgerald in [#3671](https://github.com/gfx-rs/wgpu/pull/3671).
 ### Documentation
 
 #### General
-- Build for Wasm on docs.rs. By @daxpedda in [#3462](https://github.com/gfx-rs/wgpu/pull/3428)
 
+- Build for Wasm on docs.rs. By @daxpedda in [#3462](https://github.com/gfx-rs/wgpu/pull/3428)
 
 ## wgpu-0.15.0 (2023-01-25)
 
 ### Major Changes
 
-
 #### WGSL Top-Level `let` is now `const`
 
 All top level constants are now declared with `const`, catching up with the wgsl spec.
@@ -1326,7 +1374,7 @@ All top level constants are now declared with `const`, catching up with the wgsl
 +const SOME_CONSTANT = 12.0;
 ```
 
-See https://github.com/gfx-rs/naga/blob/master/CHANGELOG.md#v011-2023-01-25 for smaller shader improvements.
+See <https://github.com/gfx-rs/naga/blob/master/CHANGELOG.md#v011-2023-01-25> for smaller shader improvements.
 
 #### Surface Capabilities API
 
@@ -1412,7 +1460,7 @@ By @39ali in [3140](https://github.com/gfx-rs/wgpu/pull/3140)
 
 You can now choose to use the DXC compiler for DX12 instead of FXC. The DXC compiler is faster, less buggy, and allows for new features compared to the old, unmaintained FXC compiler.
 
-You can choose which compiler to use at `Instance` creation using the `dx12_shader_compiler` field in the `InstanceDescriptor` struct. Note that DXC requires both `dxcompiler.dll` and `dxil.dll`, which can be downloaded from https://github.com/microsoft/DirectXShaderCompiler/releases. Both .dlls need to be shipped with your application when targeting DX12 and using the `DXC` compiler. If the .dlls can't be loaded, then it will fall back to the FXC compiler. By @39ali and @Elabajaba in [#3356](https://github.com/gfx-rs/wgpu/pull/3356)
+You can choose which compiler to use at `Instance` creation using the `dx12_shader_compiler` field in the `InstanceDescriptor` struct. Note that DXC requires both `dxcompiler.dll` and `dxil.dll`, which can be downloaded from <https://github.com/microsoft/DirectXShaderCompiler/releases>. Both .dlls need to be shipped with your application when targeting DX12 and using the `DXC` compiler. If the .dlls can't be loaded, then it will fall back to the FXC compiler. By @39ali and @Elabajaba in [#3356](https://github.com/gfx-rs/wgpu/pull/3356)
 
 #### Suballocate DX12 buffers and textures
 
@@ -1494,7 +1542,6 @@ By @jimblandy in [#3254](https://github.com/gfx-rs/wgpu/pull/3254).
 - Implement `queue_validate_write_buffer` by @jinleili in [#3098](https://github.com/gfx-rs/wgpu/pull/3098)
 - Sync depth/stencil copy restrictions with the spec by @teoxoy in [#3314](https://github.com/gfx-rs/wgpu/pull/3314)
 
-
 ### Added/New Features
 
 #### General
@@ -1527,6 +1574,7 @@ By @jimblandy in [#3254](https://github.com/gfx-rs/wgpu/pull/3254).
 - Sync `TextureFormat.describe` with the spec. By @teoxoy in [3312](https://github.com/gfx-rs/wgpu/pull/3312)
 
 #### Metal
+
 - Add a way to create `Device` and `Queue` from raw Metal resources in wgpu-hal. By @AdrianEddy in [#3338](https://github.com/gfx-rs/wgpu/pull/3338)
 
 ### Bug Fixes
@@ -1536,12 +1584,12 @@ By @jimblandy in [#3254](https://github.com/gfx-rs/wgpu/pull/3254).
 - Update ndk-sys to v0.4.1+23.1.7779620, to fix checksum failures. By @jimblandy in [#3232](https://github.com/gfx-rs/wgpu/pull/3232).
 - Bother to free the `hal::Api::CommandBuffer` when a `wgpu_core::command::CommandEncoder` is dropped. By @jimblandy in [#3069](https://github.com/gfx-rs/wgpu/pull/3069).
 - Fixed the mipmap example by adding the missing WRITE_TIMESTAMP_INSIDE_PASSES feature. By @Olaroll in [#3081](https://github.com/gfx-rs/wgpu/pull/3081).
-- Avoid panicking in some interactions with invalid resources by @nical in (#3094)[https://github.com/gfx-rs/wgpu/pull/3094]
+- Avoid panicking in some interactions with invalid resources by @nical in [#3094](https://github.com/gfx-rs/wgpu/pull/3094)
 - Fixed an integer overflow in `copy_texture_to_texture` by @nical [#3090](https://github.com/gfx-rs/wgpu/pull/3090)
-- Remove `wgpu_types::Features::DEPTH24PLUS_STENCIL8`, making `wgpu::TextureFormat::Depth24PlusStencil8` available on all backends. By @Healthire in (#3151)[https://github.com/gfx-rs/wgpu/pull/3151]
-- Fix an integer overflow in `queue_write_texture` by @nical in (#3146)[https://github.com/gfx-rs/wgpu/pull/3146]
-- Make `RenderPassCompatibilityError` and `CreateShaderModuleError` not so huge. By @jimblandy in (#3226)[https://github.com/gfx-rs/wgpu/pull/3226]
-- Check for invalid bitflag bits in wgpu-core and allow them to be captured/replayed by @nical in (#3229)[https://github.com/gfx-rs/wgpu/pull/3229]
+- Remove `wgpu_types::Features::DEPTH24PLUS_STENCIL8`, making `wgpu::TextureFormat::Depth24PlusStencil8` available on all backends. By @Healthire in [#3151](https://github.com/gfx-rs/wgpu/pull/3151)
+- Fix an integer overflow in `queue_write_texture` by @nical in [#3146](https://github.com/gfx-rs/wgpu/pull/3146)
+- Make `RenderPassCompatibilityError` and `CreateShaderModuleError` not so huge. By @jimblandy in [#3226](https://github.com/gfx-rs/wgpu/pull/3226)
+- Check for invalid bitflag bits in wgpu-core and allow them to be captured/replayed by @nical in [#3229](https://github.com/gfx-rs/wgpu/pull/3229)
 - Evaluate `gfx_select!`'s `#[cfg]` conditions at the right time. By @jimblandy in [#3253](https://github.com/gfx-rs/wgpu/pull/3253)
 - Improve error messages when binding bind group with dynamic offsets. By @cwfitzgerald in [#3294](https://github.com/gfx-rs/wgpu/pull/3294)
 - Allow non-filtering sampling of integer textures. By @JMS55 in [#3362](https://github.com/gfx-rs/wgpu/pull/3362).
@@ -1551,6 +1599,7 @@ By @jimblandy in [#3254](https://github.com/gfx-rs/wgpu/pull/3254).
 - Make `make_spirv_raw` and `make_spirv` handle big-endian binaries. By @1e1001 in [#3411](https://github.com/gfx-rs/wgpu/pull/3411).
 
 #### Vulkan
+
 - Update ash to 0.37.1+1.3.235 to fix CI breaking by changing a call to the deprecated `debug_utils_set_object_name()` function to `set_debug_utils_object_name()` by @elabajaba in [#3273](https://github.com/gfx-rs/wgpu/pull/3273)
 - Document and improve extension detection. By @teoxoy in [#3327](https://github.com/gfx-rs/wgpu/pull/3327)
 - Don't use a pointer to a local copy of a `PhysicalDeviceDriverProperties` struct after it has gone out of scope. In fact, don't make a local copy at all. Introduce a helper function for building `CStr`s from C character arrays, and remove some `unsafe` blocks. By @jimblandy in [#3076](https://github.com/gfx-rs/wgpu/pull/3076).
@@ -1561,6 +1610,7 @@ By @jimblandy in [#3254](https://github.com/gfx-rs/wgpu/pull/3254).
 - Don't re-use `GraphicsCommandList` when `close` or `reset` fails. By @xiaopengli89 in [#3204](https://github.com/gfx-rs/wgpu/pull/3204)
 
 #### Metal
+
 - Fix texture view creation with full-resource views when using an explicit `mip_level_count` or `array_layer_count`. By @cwfitzgerald in [#3323](https://github.com/gfx-rs/wgpu/pull/3323)
 
 #### GLES
@@ -1688,7 +1738,7 @@ both `raw_window_handle::HasRawWindowHandle` and `raw_window_handle::HasRawDispl
 
 #### Vulkan
 
-- Fix `astc_hdr` formats support by @jinleili in [#2971]](https://github.com/gfx-rs/wgpu/pull/2971)
+- Fix `astc_hdr` formats support by @jinleili in [#2971]](<https://github.com/gfx-rs/wgpu/pull/2971>)
 - Update to Naga b209d911 (2022-9-1) to avoid generating SPIR-V that
   violates Vulkan valid usage rules `VUID-StandaloneSpirv-Flat-06202`
   and `VUID-StandaloneSpirv-Flat-04744`. By @jimblandy in

From f7d451671c7ed54e4554f63428ad59d22a09f0bb Mon Sep 17 00:00:00 2001
From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com>
Date: Fri, 1 Mar 2024 17:50:16 +0000
Subject: [PATCH 09/31] Add more comprehensive documentation

---
 wgpu/src/lib.rs | 54 ++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 51 insertions(+), 3 deletions(-)

diff --git a/wgpu/src/lib.rs b/wgpu/src/lib.rs
index fd7225230d..ae83ff1fb2 100644
--- a/wgpu/src/lib.rs
+++ b/wgpu/src/lib.rs
@@ -1115,16 +1115,64 @@ impl ComputePipeline {
 /// creating [`RenderPipeline`]s and [`ComputePipeline`]s
 /// in subsequent executions
 ///
+/// This reuse is only applicable for the same or similar devices.
+/// See [`util::pipeline_cache_key`] for some details.
+///
+/// # Background
+///
+/// In most GPU drivers, shader code must be converted into a machine code
+/// which can be executed on the GPU.
+/// Generating this machine code can require a lot of computation.
+/// Pipeline caches allow this computation to be reused between executions
+/// of the program.
+/// This can be very useful for reducing program startup time.
+///
+/// Note that most desktop GPU drivers will manage their own caches,
+/// meaning that little advantage can be gained from this on those platforms.
+/// However, on some platforms, especially Android, drivers leave this to the
+/// application to implement.
+///
+/// Unfortunately, drivers do not expose whether they manage their own caches.
+/// Some reasonable policies for applications to use are:
+/// - Manage their own pipeline cache on all platforms
+/// - Only manage pipeline caches on Android
+///
 /// # Usage
 ///
-/// TODO
+/// It is valid to use this resource when creating multiple pipelines, in
+/// which case it will likely cache each of those pipelines.
+/// It is also valid to create a new cache for each pipeline.
+///
+/// This resource is most useful when the data produced from it (using
+/// [`PipelineCache::get_data`]) is persisted.
+/// Care should be taken that pipeline caches are only used for the same device,
+/// as pipeline caches from compatible devices are unlikely to provide any advantage.
+/// `util::pipeline_cache_key` can be used as a file/directory name to help ensure that.
+///
+/// It is recommended to store pipeline caches atomically. If persisting to disk,
+/// this can usually be achieved by creating a temporary file, then moving/[renaming]
+/// the temporary file over the existing cache
+///
+/// # Storage Usage
 ///
-/// # Memory Usage
 /// There is not currently an API available to reduce the size of a cache.
+/// This is due to limitations in the underlying graphics APIs used.
+/// This is especially impactful if your application is being updated, so
+/// previous caches are no longer being used.
+///
+/// One option to work around this is to regenerate the cache.
+/// That is, creating the pipelines which your program runs using
+/// with the stored cached data, then recreating the *same* pipelines
+/// using a new cache, which your application then store.
 ///
-/// TODO
+/// # Implementations
+///
+/// This resource currently only works on the following backends:
+///  - Vulkan
 ///
 /// This type is unique to the Rust API of `wgpu`.
+///
+/// [renaming]: std::fs::rename
 #[derive(Debug)]
 pub struct PipelineCache {
     context: Arc<C>,

From 40f2a85e2bd4909b16e5b96f6cb2c0401b5bea0e Mon Sep 17 00:00:00 2001
From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com>
Date: Mon, 4 Mar 2024 14:30:16 +0000
Subject: [PATCH 10/31] Update documentation and caching use

---
 wgpu-core/src/device/resource.rs | 33 ++++++++++++++------------------
 wgpu/src/lib.rs                  | 15 +++++++++------
 2 files changed, 23 insertions(+), 25 deletions(-)

diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index e35355df80..6f5690f5c5 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -2823,15 +2823,14 @@ impl<A: HalApi> Device<A> {
             Device::make_late_sized_buffer_groups(&shader_binding_sizes, &pipeline_layout);
 
         let cache = if let Some(cache) = desc.cache {
-            let cache = hub
-                .pipeline_caches
-                .get(cache)
-                .map_err(|_| validation::StageError::InvalidModule)?;
-
-            if cache.device.as_info().id() != self.as_info().id() {
-                return Err(DeviceError::WrongDevice.into());
+            if let Ok(cache) = hub.pipeline_caches.get(cache) {
+                if cache.device.as_info().id() != self.as_info().id() {
+                    return Err(DeviceError::WrongDevice.into());
+                }
+                Some(cache)
+            } else {
+                None
             }
-            Some(cache)
         } else {
             None
         };
@@ -3411,18 +3410,14 @@ impl<A: HalApi> Device<A> {
         }
 
         let cache = if let Some(cache) = desc.cache {
-            let cache = hub
-                .pipeline_caches
-                .get(cache)
-                // This is clearly wrong, but I'm just trying to fix the type errors
-                .map_err(|_| {
-                    pipeline::CreateRenderPipelineError::ConservativeRasterizationNonFillPolygonMode
-                })?;
-
-            if cache.device.as_info().id() != self.as_info().id() {
-                return Err(DeviceError::WrongDevice.into());
+            if let Ok(cache) = hub.pipeline_caches.get(cache) {
+                if cache.device.as_info().id() != self.as_info().id() {
+                    return Err(DeviceError::WrongDevice.into());
+                }
+                Some(cache)
+            } else {
+                None
             }
-            Some(cache)
         } else {
             None
         };
diff --git a/wgpu/src/lib.rs b/wgpu/src/lib.rs
index ae83ff1fb2..8428d161b0 100644
--- a/wgpu/src/lib.rs
+++ b/wgpu/src/lib.rs
@@ -3247,10 +3247,12 @@ impl Device {
     /// direct uses of backend APIs into this method.
     ///
     /// # Errors
-    /// Returns `None` if this device does not support [`PipelineCache`]. See the
-    /// documentation on that type for details of API support
     ///
-    /// Returns `Some` with an error value if:
+    /// Returns an error value if:
+    ///  * this device is invalid; or
+    ///  * the device is out of memory
+    ///
+    /// This method also returns an error value if:
     ///  * The `fallback` field on `desc` is false; and
     ///  * the `data` provided would not be used[^data_not_used]
     ///
@@ -3288,12 +3290,13 @@ impl Device {
     /// and [`Device::create_render_pipeline`] to initialise its cache data
     ///
     /// # Errors
-    /// Returns `None` if this device does not support [`PipelineCache`]. See the
-    /// documentation on that type for details of API support
     ///
-    /// Returns `Some` with an error value if:
+    /// Errors if:
     ///  * this device is invalid; or
     ///  * the device is out of memory
+    ///
+    /// If the error handler didn't panic,and an error value is used in
+    /// subsequent calls, default (driver-provided) caching will be used.
     pub fn create_pipeline_cache(&self, desc: &PipelineCacheDescriptor<'_>) -> PipelineCache {
         let (id, data) = DynContext::device_create_pipeline_cache(
             &*self.context,

From 66f9d54da00e6c176e3d6ec8e77e2a3d220816bf Mon Sep 17 00:00:00 2001
From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com>
Date: Tue, 5 Mar 2024 17:54:41 +0000
Subject: [PATCH 11/31] Add a feature for pipeline caching

---
 wgpu-core/src/device/resource.rs | 1 +
 wgpu-core/src/pipeline.rs        | 2 ++
 wgpu-hal/src/vulkan/adapter.rs   | 3 ++-
 wgpu-types/src/lib.rs            | 5 +++++
 wgpu/src/lib.rs                  | 2 ++
 5 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index 6f5690f5c5..c275029a61 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -3523,6 +3523,7 @@ impl<A: HalApi> Device<A> {
         self: &Arc<Self>,
         desc: &pipeline::PipelineCacheDescriptor,
     ) -> Result<pipeline::PipelineCache<A>, pipeline::CreatePipelineCacheError> {
+        self.require_features(wgt::Features::PIPELINE_CACHE)?;
         let mut cache_desc = hal::PipelineCacheDescriptor {
             data: desc.data.as_deref(),
             label: desc.label.to_hal(self.instance_flags),
diff --git a/wgpu-core/src/pipeline.rs b/wgpu-core/src/pipeline.rs
index bb1d33c1f4..6d921cafbd 100644
--- a/wgpu-core/src/pipeline.rs
+++ b/wgpu-core/src/pipeline.rs
@@ -267,6 +267,8 @@ pub enum CreatePipelineCacheError {
     Device(#[from] DeviceError),
     #[error("Pipeline cache validation failed")]
     Validation,
+    #[error(transparent)]
+    MissingFeatures(#[from] MissingFeatures),
     #[error("Internal error: {0}")]
     Internal(String),
 }
diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs
index 21219361f4..bf68dccbc0 100644
--- a/wgpu-hal/src/vulkan/adapter.rs
+++ b/wgpu-hal/src/vulkan/adapter.rs
@@ -488,7 +488,8 @@ impl PhysicalDeviceFeatures {
             | F::TIMESTAMP_QUERY_INSIDE_ENCODERS
             | F::TIMESTAMP_QUERY_INSIDE_PASSES
             | F::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES
-            | F::CLEAR_TEXTURE;
+            | F::CLEAR_TEXTURE
+            | F::PIPELINE_CACHE;
 
         let mut dl_flags = Df::COMPUTE_SHADERS
             | Df::BASE_VERTEX
diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs
index 7049cd3a8d..e81617451c 100644
--- a/wgpu-types/src/lib.rs
+++ b/wgpu-types/src/lib.rs
@@ -914,6 +914,11 @@ bitflags::bitflags! {
         ///
         /// This is a native only feature.
         const SUBGROUP_BARRIER = 1 << 58;
+        /// Allows the use of pipeline cache objects
+        ///
+        /// Supported platforms:
+        /// - Vulkan
+        const PIPELINE_CACHE = 1 << 56;
     }
 }
 
diff --git a/wgpu/src/lib.rs b/wgpu/src/lib.rs
index 8428d161b0..755048aed4 100644
--- a/wgpu/src/lib.rs
+++ b/wgpu/src/lib.rs
@@ -3249,6 +3249,7 @@ impl Device {
     /// # Errors
     ///
     /// Returns an error value if:
+    ///  * the [`PIPELINE_CACHE`](wgt::Features::PIPELINE_CACHE) feature is not enabled
     ///  * this device is invalid; or
     ///  * the device is out of memory
     ///
@@ -3292,6 +3293,7 @@ impl Device {
     /// # Errors
     ///
     /// Errors if:
+    ///  * the [`PIPELINE_CACHE`](wgt::Features::PIPELINE_CACHE) feature is not enabled
     ///  * this device is invalid; or
     ///  * the device is out of memory
     ///

From ceb70a081f402c5650d37cf8b5c4078ea0f3e9f7 Mon Sep 17 00:00:00 2001
From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com>
Date: Thu, 14 Mar 2024 09:37:25 +0000
Subject: [PATCH 12/31] Address (most) review comments

---
 wgpu-core/src/device/resource.rs | 21 +++++++++++----------
 wgpu-core/src/pipeline.rs        |  2 ++
 wgpu-hal/src/vulkan/device.rs    | 26 ++++++++++++--------------
 wgpu/src/lib.rs                  |  4 ++--
 4 files changed, 27 insertions(+), 26 deletions(-)

diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index c275029a61..3aa84041b1 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -2822,17 +2822,18 @@ impl<A: HalApi> Device<A> {
         let late_sized_buffer_groups =
             Device::make_late_sized_buffer_groups(&shader_binding_sizes, &pipeline_layout);
 
-        let cache = if let Some(cache) = desc.cache {
-            if let Ok(cache) = hub.pipeline_caches.get(cache) {
-                if cache.device.as_info().id() != self.as_info().id() {
-                    return Err(DeviceError::WrongDevice.into());
-                }
-                Some(cache)
-            } else {
-                None
+        let cache = 'cache: {
+            let Some(cache) = desc.cache else {
+                break 'cache None;
+            };
+            let Ok(cache) = hub.pipeline_caches.get(cache) else {
+                break 'cache None;
+            };
+
+            if cache.device.as_info().id() != self.as_info().id() {
+                return Err(DeviceError::WrongDevice.into());
             }
-        } else {
-            None
+            Some(cache)
         };
 
         let pipeline_desc = hal::ComputePipelineDescriptor {
diff --git a/wgpu-core/src/pipeline.rs b/wgpu-core/src/pipeline.rs
index 6d921cafbd..d6f0fedb50 100644
--- a/wgpu-core/src/pipeline.rs
+++ b/wgpu-core/src/pipeline.rs
@@ -192,6 +192,7 @@ pub struct ComputePipelineDescriptor<'a> {
     pub layout: Option<PipelineLayoutId>,
     /// The compiled compute stage and its entry point.
     pub stage: ProgrammableStageDescriptor<'a>,
+    /// The pipeline cache to use when creating this pipeline.
     pub cache: Option<PipelineCacheId>,
 }
 
@@ -379,6 +380,7 @@ pub struct RenderPipelineDescriptor<'a> {
     /// If the pipeline will be used with a multiview render pass, this indicates how many array
     /// layers the attachments will have.
     pub multiview: Option<NonZeroU32>,
+    /// The pipeline cache to use when creating this pipeline.
     pub cache: Option<PipelineCacheId>,
 }
 
diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs
index 1dc2ff2215..31ffec56f5 100644
--- a/wgpu-hal/src/vulkan/device.rs
+++ b/wgpu-hal/src/vulkan/device.rs
@@ -1863,18 +1863,17 @@ impl crate::Device for super::Device {
                 .build()
         }];
 
+        let pipeline_cache = desc
+            .cache
+            .map(|it| it.raw)
+            .unwrap_or(vk::PipelineCache::null());
+
         let mut raw_vec = {
             profiling::scope!("vkCreateGraphicsPipelines");
             unsafe {
                 self.shared
                     .raw
-                    .create_graphics_pipelines(
-                        desc.cache
-                            .map(|it| it.raw)
-                            .unwrap_or(vk::PipelineCache::null()),
-                        &vk_infos,
-                        None,
-                    )
+                    .create_graphics_pipelines(pipeline_cache, &vk_infos, None)
                     .map_err(|(_, e)| crate::DeviceError::from(e))
             }?
         };
@@ -1921,18 +1920,17 @@ impl crate::Device for super::Device {
                 .build()
         }];
 
+        let pipeline_cache = desc
+            .cache
+            .map(|it| it.raw)
+            .unwrap_or(vk::PipelineCache::null());
+
         let mut raw_vec = {
             profiling::scope!("vkCreateComputePipelines");
             unsafe {
                 self.shared
                     .raw
-                    .create_compute_pipelines(
-                        desc.cache
-                            .map(|it| it.raw)
-                            .unwrap_or(vk::PipelineCache::null()),
-                        &vk_infos,
-                        None,
-                    )
+                    .create_compute_pipelines(pipeline_cache, &vk_infos, None)
                     .map_err(|(_, e)| crate::DeviceError::from(e))
             }?
         };
diff --git a/wgpu/src/lib.rs b/wgpu/src/lib.rs
index 755048aed4..d873352286 100644
--- a/wgpu/src/lib.rs
+++ b/wgpu/src/lib.rs
@@ -1933,7 +1933,7 @@ pub struct RenderPipelineDescriptor<'a> {
     /// If the pipeline will be used with a multiview render pass, this indicates how many array
     /// layers the attachments will have.
     pub multiview: Option<NonZeroU32>,
-    /// The pipeline cache to use for this operation
+    /// The pipeline cache to use when creating this pipeline.
     pub cache: Option<&'a PipelineCache>,
 }
 #[cfg(send_sync)]
@@ -2028,7 +2028,7 @@ pub struct ComputePipelineDescriptor<'a> {
     /// The name of the entry point in the compiled shader. There must be a function with this name
     /// and no return value in the shader.
     pub entry_point: &'a str,
-    /// The pipeline cache to use when creating this pipeline
+    /// The pipeline cache to use when creating this pipeline.
     pub cache: Option<&'a PipelineCache>,
     /// Advanced options for when this pipeline is compiled
     ///

From 026edf5102cdb75ee74b1738c32df262bb633751 Mon Sep 17 00:00:00 2001
From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com>
Date: Thu, 14 Mar 2024 17:50:32 +0000
Subject: [PATCH 13/31] Sketch out some API for validation

---
 wgpu-core/src/lib.rs            |   1 +
 wgpu-core/src/pipeline_cache.rs | 186 ++++++++++++++++++++++++++++++++
 2 files changed, 187 insertions(+)
 create mode 100644 wgpu-core/src/pipeline_cache.rs

diff --git a/wgpu-core/src/lib.rs b/wgpu-core/src/lib.rs
index 032d85a4bc..ebf80091c3 100644
--- a/wgpu-core/src/lib.rs
+++ b/wgpu-core/src/lib.rs
@@ -65,6 +65,7 @@ mod init_tracker;
 pub mod instance;
 mod lock;
 pub mod pipeline;
+mod pipeline_cache;
 mod pool;
 pub mod present;
 pub mod registry;
diff --git a/wgpu-core/src/pipeline_cache.rs b/wgpu-core/src/pipeline_cache.rs
new file mode 100644
index 0000000000..9a8195d2ad
--- /dev/null
+++ b/wgpu-core/src/pipeline_cache.rs
@@ -0,0 +1,186 @@
+use std::io::Write;
+
+use wgt::AdapterInfo;
+
+const MAGIC: [u8; 8] = *b"WGPUPLCH";
+const HEADER_VERSION: u32 = 1;
+const ABI: u32 = std::mem::size_of::<*const ()>() as u32;
+
+#[repr(C)]
+struct PipelineCacheHeader {
+    /// The magic header to ensure that we have the right file format
+    /// Has a value of MAGIC, as above
+    magic: [u8; 8],
+    // /// The total size of this header, in bytes
+    // header_size: u32,
+    /// The version of this wgpu header
+    /// Should be equal to HEADER_VERSION above
+    ///
+    /// This must always be the second item, after the value above
+    header_version: u32,
+    /// The number of bytes in the pointers of this ABI, because some drivers
+    /// have previously not distinguished between their 32 bit and 64 bit drivers
+    /// leading to Vulkan data corruption
+    cache_abi: u32,
+    /// The id for the backend in use, from [wgt::Backend]
+    backend: u8,
+    /// The hash key which identifiers the device/adapter.
+    /// This is used to validate that this pipeline cache (probably) was produced for
+    /// the expected device.
+    /// On Vulkan: it is a combination of vendor ID and device ID
+    adapter_key: [u8; 15],
+    /// A key used to validate that this device is still compatible with the cache
+    ///
+    /// This should e.g. contain driver version and/or intermediate compiler versions
+    device_key: [u8; 16],
+    /// The length of the data which is sent to/recieved from the backend
+    data_size: u64,
+    /// The hash of the data which is sent to/recieved from the backend, and which
+    /// follows this header. That should be the remainder of the memory
+    data_hash: u64,
+}
+
+pub enum PipelineCacheValidationError {
+    Truncated,
+    Corrupted,
+    Outdated,
+    WrongDevice,
+    Unsupported,
+}
+
+impl PipelineCacheValidationError {
+    /// Could the error have been avoided?
+    pub fn was_avoidable(&self) -> bool {
+        match self {
+            PipelineCacheValidationError::WrongDevice
+            | PipelineCacheValidationError::Unsupported => true,
+            PipelineCacheValidationError::Truncated
+            | PipelineCacheValidationError::Outdated
+            | PipelineCacheValidationError::Corrupted => false,
+        }
+    }
+}
+
+/// Validate the data in a pipeline cache
+pub fn validate_pipeline_cache<'d>(
+    cache_data: &'d [u8],
+    adapter: &AdapterInfo,
+    device_key: [u8; 16],
+) -> Result<&'d [u8], PipelineCacheValidationError> {
+    let adapter_key = adapter_key(adapter)?;
+    let Some((header, remaining_data)) = read_header(cache_data) else {
+        return Err(PipelineCacheValidationError::Truncated);
+    };
+    if header.magic != MAGIC {
+        return Err(PipelineCacheValidationError::Corrupted);
+    }
+    if header.header_version != HEADER_VERSION {
+        return Err(PipelineCacheValidationError::Outdated);
+    }
+    if header.cache_abi != ABI {
+        return Err(PipelineCacheValidationError::Outdated);
+    }
+    if header.backend != adapter.backend as u8 {
+        return Err(PipelineCacheValidationError::WrongDevice);
+    }
+    if header.adapter_key != adapter_key {
+        return Err(PipelineCacheValidationError::WrongDevice);
+    }
+    if header.device_key != device_key {
+        return Err(PipelineCacheValidationError::WrongDevice);
+    }
+    let data_size: usize = header
+        .data_size
+        .try_into()
+        // If the data was previously more than 4GiB, and we're on the same size of system (ABI, above)l
+        // Then the data must be corrupted
+        .map_err(|_| PipelineCacheValidationError::Corrupted)?;
+    if data_size != remaining_data.len() {
+        return Err(PipelineCacheValidationError::WrongDevice);
+    }
+    Ok(remaining_data)
+}
+
+fn adapter_key(adapter: &AdapterInfo) -> Result<[u8; 15], PipelineCacheValidationError> {
+    match adapter.backend {
+        wgt::Backend::Vulkan => {
+            // If these change size, the header format needs to change
+            // We set the type explicitly so this won't compile in that case
+            let v: [u8; 4] = adapter.vendor.to_be_bytes();
+            let d: [u8; 4] = adapter.device.to_be_bytes();
+            let adapter = [
+                255, 255, 255, v[0], v[1], v[2], v[3], d[0], d[1], d[2], d[3], 255, 255, 255, 255,
+            ];
+            Ok(adapter)
+        }
+        _ => Err(PipelineCacheValidationError::Unsupported),
+    }
+}
+
+pub fn write_pipeline_cache(writer: &mut dyn Write, data: &[u8], adpater: &AdapterInfo) {}
+
+fn read_header(data: &[u8]) -> Option<(PipelineCacheHeader, &[u8])> {
+    let mut reader = Reader {
+        data,
+        total_read: 0,
+    };
+    let magic = reader.read_array()?;
+    let header_version = reader.read_u32()?;
+    let cache_abi = reader.read_u32()?;
+    let backend = reader.read_byte()?;
+    let adapter_key = reader.read_array()?;
+    let device_key = reader.read_array()?;
+    let data_size = reader.read_u64()?;
+    let data_hash = reader.read_u64()?;
+
+    assert_eq!(
+        reader.total_read,
+        std::mem::size_of::<PipelineCacheHeader>()
+    );
+
+    Some((
+        PipelineCacheHeader {
+            magic,
+            header_version,
+            cache_abi,
+            backend,
+            adapter_key,
+            device_key,
+            data_size,
+            data_hash,
+        },
+        reader.data,
+    ))
+}
+
+fn write_header(header: PipelineCacheHeader, writer: &mut dyn Write) {}
+
+struct Reader<'a> {
+    data: &'a [u8],
+    total_read: usize,
+}
+
+impl<'a> Reader<'a> {
+    fn read_byte(&mut self) -> Option<u8> {
+        let res = *self.data.get(0)?;
+        self.total_read += 1;
+        self.data = &self.data[1..];
+        Some(res)
+    }
+    fn read_array<const N: usize>(&mut self) -> Option<[u8; N]> {
+        let (start, data) = self.data.split_at(N);
+        self.total_read += N;
+        self.data = data;
+        start.try_into().ok()
+    }
+
+    fn read_u16(&mut self) -> Option<u16> {
+        self.read_array().map(u16::from_be_bytes)
+    }
+    fn read_u32(&mut self) -> Option<u32> {
+        self.read_array().map(u32::from_be_bytes)
+    }
+    fn read_u64(&mut self) -> Option<u64> {
+        self.read_array().map(u64::from_be_bytes)
+    }
+}

From 79693877025da923df0e053f36362b1214a0c296 Mon Sep 17 00:00:00 2001
From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com>
Date: Fri, 15 Mar 2024 18:05:59 +0000
Subject: [PATCH 14/31] Sketch out the cache header API fully

---
 wgpu-core/src/pipeline.rs       |   3 +-
 wgpu-core/src/pipeline_cache.rs | 508 ++++++++++++++++++++++++++------
 2 files changed, 426 insertions(+), 85 deletions(-)

diff --git a/wgpu-core/src/pipeline.rs b/wgpu-core/src/pipeline.rs
index d6f0fedb50..2e7c6096bd 100644
--- a/wgpu-core/src/pipeline.rs
+++ b/wgpu-core/src/pipeline.rs
@@ -1,5 +1,6 @@
 #[cfg(feature = "trace")]
 use crate::device::trace;
+pub use crate::pipeline_cache::PipelineCacheValidationError;
 use crate::{
     binding_model::{CreateBindGroupLayoutError, CreatePipelineLayoutError, PipelineLayout},
     command::ColorAttachmentError,
@@ -267,7 +268,7 @@ pub enum CreatePipelineCacheError {
     #[error(transparent)]
     Device(#[from] DeviceError),
     #[error("Pipeline cache validation failed")]
-    Validation,
+    Validation(PipelineCacheValidationError),
     #[error(transparent)]
     MissingFeatures(#[from] MissingFeatures),
     #[error("Internal error: {0}")]
diff --git a/wgpu-core/src/pipeline_cache.rs b/wgpu-core/src/pipeline_cache.rs
index 9a8195d2ad..b03c1392c7 100644
--- a/wgpu-core/src/pipeline_cache.rs
+++ b/wgpu-core/src/pipeline_cache.rs
@@ -1,60 +1,36 @@
-use std::io::Write;
-
+use thiserror::Error;
 use wgt::AdapterInfo;
 
-const MAGIC: [u8; 8] = *b"WGPUPLCH";
-const HEADER_VERSION: u32 = 1;
-const ABI: u32 = std::mem::size_of::<*const ()>() as u32;
-
-#[repr(C)]
-struct PipelineCacheHeader {
-    /// The magic header to ensure that we have the right file format
-    /// Has a value of MAGIC, as above
-    magic: [u8; 8],
-    // /// The total size of this header, in bytes
-    // header_size: u32,
-    /// The version of this wgpu header
-    /// Should be equal to HEADER_VERSION above
-    ///
-    /// This must always be the second item, after the value above
-    header_version: u32,
-    /// The number of bytes in the pointers of this ABI, because some drivers
-    /// have previously not distinguished between their 32 bit and 64 bit drivers
-    /// leading to Vulkan data corruption
-    cache_abi: u32,
-    /// The id for the backend in use, from [wgt::Backend]
-    backend: u8,
-    /// The hash key which identifiers the device/adapter.
-    /// This is used to validate that this pipeline cache (probably) was produced for
-    /// the expected device.
-    /// On Vulkan: it is a combination of vendor ID and device ID
-    adapter_key: [u8; 15],
-    /// A key used to validate that this device is still compatible with the cache
-    ///
-    /// This should e.g. contain driver version and/or intermediate compiler versions
-    device_key: [u8; 16],
-    /// The length of the data which is sent to/recieved from the backend
-    data_size: u64,
-    /// The hash of the data which is sent to/recieved from the backend, and which
-    /// follows this header. That should be the remainder of the memory
-    data_hash: u64,
-}
+pub const HEADER_LENGTH: usize = std::mem::size_of::<PipelineCacheHeader>();
 
+#[derive(Debug, PartialEq, Eq, Clone, Error)]
+#[non_exhaustive]
 pub enum PipelineCacheValidationError {
+    #[error("The pipeline cache data truncataed")]
     Truncated,
+    #[error("The pipeline cache data was longer than recorded")]
+    // TODO: Is it plausible that this would happen
+    Extended,
+    #[error("The pipeline cache data was corrupted (e.g. the hash didn't match)")]
     Corrupted,
+    #[error("The pipeline cacha data was out of date and so cannot be safely used")]
     Outdated,
+    #[error("The cache data was created for a different device")]
     WrongDevice,
+    #[error("Pipeline cacha data was created for a future version of wgpu")]
     Unsupported,
 }
 
 impl PipelineCacheValidationError {
     /// Could the error have been avoided?
+    /// That is, is there a mistake in user code interacting with the cache
     pub fn was_avoidable(&self) -> bool {
         match self {
-            PipelineCacheValidationError::WrongDevice
-            | PipelineCacheValidationError::Unsupported => true,
+            PipelineCacheValidationError::WrongDevice => true,
             PipelineCacheValidationError::Truncated
+            | PipelineCacheValidationError::Unsupported
+            | PipelineCacheValidationError::Extended
+            // It's unusual, but not implausible, to be downgrading wgpu
             | PipelineCacheValidationError::Outdated
             | PipelineCacheValidationError::Corrupted => false,
         }
@@ -65,10 +41,10 @@ impl PipelineCacheValidationError {
 pub fn validate_pipeline_cache<'d>(
     cache_data: &'d [u8],
     adapter: &AdapterInfo,
-    device_key: [u8; 16],
+    validation_key: [u8; 16],
 ) -> Result<&'d [u8], PipelineCacheValidationError> {
     let adapter_key = adapter_key(adapter)?;
-    let Some((header, remaining_data)) = read_header(cache_data) else {
+    let Some((header, remaining_data)) = PipelineCacheHeader::read(cache_data) else {
         return Err(PipelineCacheValidationError::Truncated);
     };
     if header.magic != MAGIC {
@@ -86,21 +62,146 @@ pub fn validate_pipeline_cache<'d>(
     if header.adapter_key != adapter_key {
         return Err(PipelineCacheValidationError::WrongDevice);
     }
-    if header.device_key != device_key {
-        return Err(PipelineCacheValidationError::WrongDevice);
+    if header.validation_key != validation_key {
+        // If the validation key is wrong, that means that this device has changed
+        // in a way where the cache won't be compatible since the cache was made,
+        // so it is outdated
+        return Err(PipelineCacheValidationError::Outdated);
     }
     let data_size: usize = header
         .data_size
         .try_into()
-        // If the data was previously more than 4GiB, and we're on the same size of system (ABI, above)l
+        // If the data was previously more than 4GiB, and we're still on a 32 bit system (ABI check, above)
         // Then the data must be corrupted
         .map_err(|_| PipelineCacheValidationError::Corrupted)?;
-    if data_size != remaining_data.len() {
-        return Err(PipelineCacheValidationError::WrongDevice);
+    if remaining_data.len() < data_size {
+        return Err(PipelineCacheValidationError::Truncated);
+    }
+    if remaining_data.len() > data_size {
+        return Err(PipelineCacheValidationError::Extended);
+    }
+    let hash = hash(remaining_data);
+    if header.data_hash != hash {
+        return Err(PipelineCacheValidationError::Corrupted);
     }
     Ok(remaining_data)
 }
 
+pub fn add_cache_header(
+    in_region: &mut [u8],
+    data: &[u8],
+    adapter: &AdapterInfo,
+    validation_key: [u8; 16],
+) {
+    assert_eq!(in_region.len(), HEADER_LENGTH);
+    let data_hash = hash(&data);
+    let header = PipelineCacheHeader {
+        adapter_key: adapter_key(adapter)
+            .expect("Called add_cache_header for an adapter which doesn't support cache data. This is a wgpu internal bug"),
+        backend: adapter.backend as u8,
+        cache_abi: ABI,
+        magic: MAGIC,
+        header_version: HEADER_VERSION,
+        validation_key,
+        data_hash,
+        data_size: data
+            .len()
+            .try_into()
+            .expect("Cache larger than u64::MAX bytes"),
+    };
+    header.write(in_region);
+}
+
+const MAGIC: [u8; 8] = *b"WGPUPLCH";
+const HEADER_VERSION: u32 = 1;
+const ABI: u32 = std::mem::size_of::<*const ()>() as u32;
+
+#[repr(C)]
+#[derive(PartialEq, Eq)]
+struct PipelineCacheHeader {
+    /// The magic header to ensure that we have the right file format
+    /// Has a value of MAGIC, as above
+    magic: [u8; 8],
+    // /// The total size of this header, in bytes
+    // header_size: u32,
+    /// The version of this wgpu header
+    /// Should be equal to HEADER_VERSION above
+    ///
+    /// This must always be the second item, after the value above
+    header_version: u32,
+    /// The number of bytes in the pointers of this ABI, because some drivers
+    /// have previously not distinguished between their 32 bit and 64 bit drivers
+    /// leading to Vulkan data corruption
+    cache_abi: u32,
+    /// The id for the backend in use, from [wgt::Backend]
+    backend: u8,
+    /// The hash key which identifiers the device/adapter.
+    /// This is used to validate that this pipeline cache (probably) was produced for
+    /// the expected device.
+    /// On Vulkan: it is a combination of vendor ID and device ID
+    adapter_key: [u8; 15],
+    /// A key used to validate that this device is still compatible with the cache
+    ///
+    /// This should e.g. contain driver version and/or intermediate compiler versions
+    validation_key: [u8; 16],
+    /// The length of the data which is sent to/recieved from the backend
+    data_size: u64,
+    /// The hash of the data which is sent to/recieved from the backend, and which
+    /// follows this header. That should be the remainder of the memory
+    data_hash: u64,
+}
+
+impl PipelineCacheHeader {
+    fn read(data: &[u8]) -> Option<(PipelineCacheHeader, &[u8])> {
+        let mut reader = Reader {
+            data,
+            total_read: 0,
+        };
+        let magic = reader.read_array()?;
+        let header_version = reader.read_u32()?;
+        let cache_abi = reader.read_u32()?;
+        let backend = reader.read_byte()?;
+        let adapter_key = reader.read_array()?;
+        let validation_key = reader.read_array()?;
+        let data_size = reader.read_u64()?;
+        let data_hash = reader.read_u64()?;
+
+        assert_eq!(
+            reader.total_read,
+            std::mem::size_of::<PipelineCacheHeader>()
+        );
+
+        Some((
+            PipelineCacheHeader {
+                magic,
+                header_version,
+                cache_abi,
+                backend,
+                adapter_key,
+                validation_key,
+                data_size,
+                data_hash,
+            },
+            reader.data,
+        ))
+    }
+
+    fn write(&self, into: &mut [u8]) -> Option<()> {
+        let mut writer = Writer { data: into };
+        writer.write_array(&self.magic)?;
+        writer.write_u32(self.header_version)?;
+        writer.write_u32(self.cache_abi)?;
+        writer.write_byte(self.backend)?;
+        writer.write_array(&self.adapter_key)?;
+        writer.write_array(&self.validation_key)?;
+        writer.write_u64(self.data_size)?;
+        writer.write_u64(self.data_hash)?;
+
+        assert_eq!(writer.data.len(), 0);
+        Some(())
+    }
+}
+
 fn adapter_key(adapter: &AdapterInfo) -> Result<[u8; 15], PipelineCacheValidationError> {
     match adapter.backend {
         wgt::Backend::Vulkan => {
@@ -117,44 +218,15 @@ fn adapter_key(adapter: &AdapterInfo) -> Result<[u8; 15], PipelineCacheValidatio
     }
 }
 
-pub fn write_pipeline_cache(writer: &mut dyn Write, data: &[u8], adpater: &AdapterInfo) {}
-
-fn read_header(data: &[u8]) -> Option<(PipelineCacheHeader, &[u8])> {
-    let mut reader = Reader {
-        data,
-        total_read: 0,
-    };
-    let magic = reader.read_array()?;
-    let header_version = reader.read_u32()?;
-    let cache_abi = reader.read_u32()?;
-    let backend = reader.read_byte()?;
-    let adapter_key = reader.read_array()?;
-    let device_key = reader.read_array()?;
-    let data_size = reader.read_u64()?;
-    let data_hash = reader.read_u64()?;
-
-    assert_eq!(
-        reader.total_read,
-        std::mem::size_of::<PipelineCacheHeader>()
+fn hash(data: &[u8]) -> u64 {
+    log::warn!(
+        "Using fake 'hash' for {} bytes of data. Data might become invalid",
+        data.len()
     );
-
-    Some((
-        PipelineCacheHeader {
-            magic,
-            header_version,
-            cache_abi,
-            backend,
-            adapter_key,
-            device_key,
-            data_size,
-            data_hash,
-        },
-        reader.data,
-    ))
+    // TODO: Actually do a proper hash
+    0xFEDCBA9_876543210
 }
 
-fn write_header(header: PipelineCacheHeader, writer: &mut dyn Write) {}
-
 struct Reader<'a> {
     data: &'a [u8],
     total_read: usize,
@@ -168,10 +240,14 @@ impl<'a> Reader<'a> {
         Some(res)
     }
     fn read_array<const N: usize>(&mut self) -> Option<[u8; N]> {
+        // Only greater than because we're indexing fenceposts, not items
+        if N > self.data.len() {
+            return None;
+        }
         let (start, data) = self.data.split_at(N);
         self.total_read += N;
         self.data = data;
-        start.try_into().ok()
+        Some(start.try_into().expect("off-by-one-error in array size"))
     }
 
     fn read_u16(&mut self) -> Option<u16> {
@@ -184,3 +260,267 @@ impl<'a> Reader<'a> {
         self.read_array().map(u64::from_be_bytes)
     }
 }
+
+struct Writer<'a> {
+    data: &'a mut [u8],
+}
+
+impl<'a> Writer<'a> {
+    fn write_byte(&mut self, byte: u8) -> Option<()> {
+        self.write_array(&[byte])
+    }
+    fn write_array<const N: usize>(&mut self, array: &[u8; N]) -> Option<()> {
+        // Only greater than because we're indexing fenceposts, not items
+        if N > self.data.len() {
+            return None;
+        }
+        let data = std::mem::replace(&mut self.data, &mut []);
+        let (start, data) = data.split_at_mut(N);
+        self.data = data;
+        start.copy_from_slice(array);
+        Some(())
+    }
+
+    fn write_u16(&mut self, value: u16) -> Option<()> {
+        self.write_array(&value.to_be_bytes())
+    }
+    fn write_u32(&mut self, value: u32) -> Option<()> {
+        self.write_array(&value.to_be_bytes())
+    }
+    fn write_u64(&mut self, value: u64) -> Option<()> {
+        self.write_array(&value.to_be_bytes())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use wgt::AdapterInfo;
+
+    use crate::pipeline_cache::{PipelineCacheValidationError as E, HEADER_LENGTH};
+
+    use super::ABI;
+
+    // Assert the correct size
+    const _: [(); HEADER_LENGTH] = [(); 64];
+
+    const ADAPTER: AdapterInfo = AdapterInfo {
+        name: String::new(),
+        vendor: 0x0002_FEED,
+        device: 0xFEFE_FEFE,
+        device_type: wgt::DeviceType::Other,
+        driver: String::new(),
+        driver_info: String::new(),
+        backend: wgt::Backend::Vulkan,
+    };
+
+    // IMPORTANT: If these tests fail, then you MUST increment HEADER_VERSION
+    const VALIDATION_KEY: [u8; 16] = u128::to_be_bytes(0xFFFFFFFF_FFFFFFFF_88888888_88888888);
+    #[test]
+    fn written_header() {
+        let mut result = [0; HEADER_LENGTH];
+        super::add_cache_header(&mut result, &[], &ADAPTER, VALIDATION_KEY);
+        let cache: [[u8; 8]; HEADER_LENGTH / 8] = [
+            *b"WGPUPLCH",                                 // MAGIC
+            [0, 0, 0, 1, 0, 0, 0, ABI as u8],             // Version and ABI
+            [1, 255, 255, 255, 0, 2, 0xFE, 0xED],         // Backend and Adapter key
+            [0xFE, 0xFE, 0xFE, 0xFE, 255, 255, 255, 255], // Backend and Adapter key
+            0xFFFFFFFF_FFFFFFFFu64.to_be_bytes(),         // Validation key
+            0x88888888_88888888u64.to_be_bytes(),         // Validation key
+            0x0u64.to_be_bytes(),                         // Data size
+            0xFEDCBA9_876543210u64.to_be_bytes(),         // Hash
+        ];
+        let expected = cache.into_iter().flatten().collect::<Vec<u8>>();
+
+        assert_eq!(result.as_slice(), expected.as_slice());
+    }
+
+    #[test]
+    fn valid_data() {
+        let cache: [[u8; 8]; HEADER_LENGTH / 8] = [
+            *b"WGPUPLCH",                                 // MAGIC
+            [0, 0, 0, 1, 0, 0, 0, ABI as u8],             // Version and ABI
+            [1, 255, 255, 255, 0, 2, 0xFE, 0xED],         // Backend and Adapter key
+            [0xFE, 0xFE, 0xFE, 0xFE, 255, 255, 255, 255], // Backend and Adapter key
+            0xFFFFFFFF_FFFFFFFFu64.to_be_bytes(),         // Validation key
+            0x88888888_88888888u64.to_be_bytes(),         // Validation key
+            0x0u64.to_be_bytes(),                         // Data size
+            0xFEDCBA9_876543210u64.to_be_bytes(),         // Hash
+        ];
+        let cache = cache.into_iter().flatten().collect::<Vec<u8>>();
+        let expected: &[u8] = &[];
+        let validation_result = super::validate_pipeline_cache(&cache, &ADAPTER, VALIDATION_KEY);
+        assert_eq!(validation_result, Ok(expected));
+    }
+    #[test]
+    fn invalid_magic() {
+        let cache: [[u8; 8]; HEADER_LENGTH / 8] = [
+            *b"NOT_WGPU",                                 // (Wrong) MAGIC
+            [0, 0, 0, 1, 0, 0, 0, ABI as u8],             // Version and ABI
+            [1, 255, 255, 255, 0, 2, 0xFE, 0xED],         // Backend and Adapter key
+            [0xFE, 0xFE, 0xFE, 0xFE, 255, 255, 255, 255], // Backend and Adapter key
+            0xFFFFFFFF_FFFFFFFFu64.to_be_bytes(),         // Validation key
+            0x88888888_88888888u64.to_be_bytes(),         // Validation key
+            0x0u64.to_be_bytes(),                         // Data size
+            0xFEDCBA9_876543210u64.to_be_bytes(),         // Hash
+        ];
+        let cache = cache.into_iter().flatten().collect::<Vec<u8>>();
+        let validation_result = super::validate_pipeline_cache(&cache, &ADAPTER, VALIDATION_KEY);
+        assert_eq!(validation_result, Err(E::Corrupted));
+    }
+
+    #[test]
+    fn wrong_version() {
+        let cache: [[u8; 8]; HEADER_LENGTH / 8] = [
+            *b"WGPUPLCH",                                 // MAGIC
+            [0, 0, 0, 2, 0, 0, 0, ABI as u8],             // (wrong) Version and ABI
+            [1, 255, 255, 255, 0, 2, 0xFE, 0xED],         // Backend and Adapter key
+            [0xFE, 0xFE, 0xFE, 0xFE, 255, 255, 255, 255], // Backend and Adapter key
+            0xFFFFFFFF_FFFFFFFFu64.to_be_bytes(),         // Validation key
+            0x88888888_88888888u64.to_be_bytes(),         // Validation key
+            0x0u64.to_be_bytes(),                         // Data size
+            0xFEDCBA9_876543210u64.to_be_bytes(),         // Hash
+        ];
+        let cache = cache.into_iter().flatten().collect::<Vec<u8>>();
+        let validation_result = super::validate_pipeline_cache(&cache, &ADAPTER, VALIDATION_KEY);
+        assert_eq!(validation_result, Err(E::Outdated));
+    }
+    #[test]
+    fn wrong_abi() {
+        let cache: [[u8; 8]; HEADER_LENGTH / 8] = [
+            *b"WGPUPLCH", // MAGIC
+            // a 14 bit ABI is improbable
+            [0, 0, 0, 1, 0, 0, 0, 14],            // Version and (wrong) ABI
+            [1, 255, 255, 255, 0, 2, 0xFE, 0xED], // Backend and Adapter key
+            [0xFE, 0xFE, 0xFE, 0xFE, 255, 255, 255, 255], // Backend and Adapter key
+            0xFFFFFFFF_FFFFFFFFu64.to_be_bytes(), // Validation key
+            0x88888888_88888888u64.to_be_bytes(), // Validation key
+            0x0u64.to_be_bytes(),                 // Data size
+            0xFEDCBA9_876543210u64.to_be_bytes(), // Header
+        ];
+        let cache = cache.into_iter().flatten().collect::<Vec<u8>>();
+        let validation_result = super::validate_pipeline_cache(&cache, &ADAPTER, VALIDATION_KEY);
+        assert_eq!(validation_result, Err(E::Outdated));
+    }
+
+    #[test]
+    fn wrong_backend() {
+        let cache: [[u8; 8]; HEADER_LENGTH / 8] = [
+            *b"WGPUPLCH",                                 // MAGIC
+            [0, 0, 0, 1, 0, 0, 0, ABI as u8],             // Version and ABI
+            [2, 255, 255, 255, 0, 2, 0xFE, 0xED],         // (wrong) Backend and Adapter key
+            [0xFE, 0xFE, 0xFE, 0xFE, 255, 255, 255, 255], // Backend and Adapter key
+            0xFFFFFFFF_FFFFFFFFu64.to_be_bytes(),         // Validation key
+            0x88888888_88888888u64.to_be_bytes(),         // Validation key
+            0x0u64.to_be_bytes(),                         // Data size
+            0xFEDCBA9_876543210u64.to_be_bytes(),         // Hash
+        ];
+        let cache = cache.into_iter().flatten().collect::<Vec<u8>>();
+        let validation_result = super::validate_pipeline_cache(&cache, &ADAPTER, VALIDATION_KEY);
+        assert_eq!(validation_result, Err(E::WrongDevice));
+    }
+    #[test]
+    fn wrong_adapter() {
+        let cache: [[u8; 8]; HEADER_LENGTH / 8] = [
+            *b"WGPUPLCH",                                 // MAGIC
+            [0, 0, 0, 1, 0, 0, 0, ABI as u8],             // Version and ABI
+            [1, 255, 255, 255, 0, 2, 0xFE, 0x00],         // Backend and (wrong) Adapter key
+            [0xFE, 0xFE, 0xFE, 0xFE, 255, 255, 255, 255], // Backend and Adapter key
+            0xFFFFFFFF_FFFFFFFFu64.to_be_bytes(),         // Validation key
+            0x88888888_88888888u64.to_be_bytes(),         // Validation key
+            0x0u64.to_be_bytes(),                         // Data size
+            0xFEDCBA9_876543210u64.to_be_bytes(),         // Hash
+        ];
+        let cache = cache.into_iter().flatten().collect::<Vec<u8>>();
+        let validation_result = super::validate_pipeline_cache(&cache, &ADAPTER, VALIDATION_KEY);
+        assert_eq!(validation_result, Err(E::WrongDevice));
+    }
+    #[test]
+    fn wrong_validation() {
+        let cache: [[u8; 8]; HEADER_LENGTH / 8] = [
+            *b"WGPUPLCH",                                 // MAGIC
+            [0, 0, 0, 1, 0, 0, 0, ABI as u8],             // Version and ABI
+            [1, 255, 255, 255, 0, 2, 0xFE, 0xED],         // Backend and Adapter key
+            [0xFE, 0xFE, 0xFE, 0xFE, 255, 255, 255, 255], // Backend and Adapter key
+            0xFFFFFFFF_FFFFFFFFu64.to_be_bytes(),         // Validation key
+            0x88888888_00000000u64.to_be_bytes(),         // (wrong) Validation key
+            0x0u64.to_be_bytes(),                         // Data size
+            0xFEDCBA9_876543210u64.to_be_bytes(),         // Hash
+        ];
+        let cache = cache.into_iter().flatten().collect::<Vec<u8>>();
+        let validation_result = super::validate_pipeline_cache(&cache, &ADAPTER, VALIDATION_KEY);
+        assert_eq!(validation_result, Err(E::Outdated));
+    }
+    #[test]
+    fn too_little_data() {
+        let cache: [[u8; 8]; HEADER_LENGTH / 8] = [
+            *b"WGPUPLCH",                                 // MAGIC
+            [0, 0, 0, 1, 0, 0, 0, ABI as u8],             // Version and ABI
+            [1, 255, 255, 255, 0, 2, 0xFE, 0xED],         // Backend and Adapter key
+            [0xFE, 0xFE, 0xFE, 0xFE, 255, 255, 255, 255], // Backend and Adapter key
+            0xFFFFFFFF_FFFFFFFFu64.to_be_bytes(),         // Validation key
+            0x88888888_88888888u64.to_be_bytes(),         // Validation key
+            0x064u64.to_be_bytes(),                       // Data size
+            0xFEDCBA9_876543210u64.to_be_bytes(),         // Hash
+        ];
+        let cache = cache.into_iter().flatten().collect::<Vec<u8>>();
+        let validation_result = super::validate_pipeline_cache(&cache, &ADAPTER, VALIDATION_KEY);
+        assert_eq!(validation_result, Err(E::Truncated));
+    }
+    #[test]
+    fn not_no_data() {
+        let cache: [[u8; 8]; HEADER_LENGTH / 8] = [
+            *b"WGPUPLCH",                                 // MAGIC
+            [0, 0, 0, 1, 0, 0, 0, ABI as u8],             // Version and ABI
+            [1, 255, 255, 255, 0, 2, 0xFE, 0xED],         // Backend and Adapter key
+            [0xFE, 0xFE, 0xFE, 0xFE, 255, 255, 255, 255], // Backend and Adapter key
+            0xFFFFFFFF_FFFFFFFFu64.to_be_bytes(),         // Validation key
+            0x88888888_88888888u64.to_be_bytes(),         // Validation key
+            100u64.to_be_bytes(),                         // Data size
+            0xFEDCBA9_876543210u64.to_be_bytes(),         // Hash
+        ];
+        let cache = cache
+            .into_iter()
+            .flatten()
+            .chain(std::iter::repeat(0u8).take(100))
+            .collect::<Vec<u8>>();
+        let validation_result = super::validate_pipeline_cache(&cache, &ADAPTER, VALIDATION_KEY);
+        let expected: &[u8] = &[0; 100];
+        assert_eq!(validation_result, Ok(expected));
+    }
+    #[test]
+    fn too_much_data() {
+        let cache: [[u8; 8]; HEADER_LENGTH / 8] = [
+            *b"WGPUPLCH",                                 // MAGIC
+            [0, 0, 0, 1, 0, 0, 0, ABI as u8],             // Version and ABI
+            [1, 255, 255, 255, 0, 2, 0xFE, 0xED],         // Backend and Adapter key
+            [0xFE, 0xFE, 0xFE, 0xFE, 255, 255, 255, 255], // Backend and Adapter key
+            0xFFFFFFFF_FFFFFFFFu64.to_be_bytes(),         // Validation key
+            0x88888888_88888888u64.to_be_bytes(),         // Validation key
+            0x064u64.to_be_bytes(),                       // Data size
+            0xFEDCBA9_876543210u64.to_be_bytes(),         // Hash
+        ];
+        let cache = cache
+            .into_iter()
+            .flatten()
+            .chain(std::iter::repeat(0u8).take(200))
+            .collect::<Vec<u8>>();
+        let validation_result = super::validate_pipeline_cache(&cache, &ADAPTER, VALIDATION_KEY);
+        assert_eq!(validation_result, Err(E::Extended));
+    }
+    #[test]
+    fn wrong_hash() {
+        let cache: [[u8; 8]; HEADER_LENGTH / 8] = [
+            *b"WGPUPLCH",                                 // MAGIC
+            [0, 0, 0, 1, 0, 0, 0, ABI as u8],             // Version and ABI
+            [1, 255, 255, 255, 0, 2, 0xFE, 0xED],         // Backend and Adapter key
+            [0xFE, 0xFE, 0xFE, 0xFE, 255, 255, 255, 255], // Backend and Adapter key
+            0xFFFFFFFF_FFFFFFFFu64.to_be_bytes(),         // Validation key
+            0x88888888_88888888u64.to_be_bytes(),         // Validation key
+            0x0u64.to_be_bytes(),                         // Data size
+            0x00000000_00000000u64.to_be_bytes(),         // Hash
+        ];
+        let cache = cache.into_iter().flatten().collect::<Vec<u8>>();
+        let validation_result = super::validate_pipeline_cache(&cache, &ADAPTER, VALIDATION_KEY);
+        assert_eq!(validation_result, Err(E::Corrupted));
+    }
+}

From 824feab6d2d374050623ce78dd09f1bb9503d2fe Mon Sep 17 00:00:00 2001
From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com>
Date: Mon, 18 Mar 2024 10:55:26 +0000
Subject: [PATCH 15/31] Integrate cache validation

---
 wgpu-core/src/device/global.rs   | 20 +++++++++++++++++++-
 wgpu-core/src/device/resource.rs | 28 ++++++++++++++++++++++------
 wgpu-core/src/pipeline.rs        |  3 +--
 wgpu-core/src/pipeline_cache.rs  | 12 ++++++------
 wgpu-hal/src/lib.rs              |  5 +++--
 wgpu-hal/src/vulkan/adapter.rs   | 14 ++++++++++++++
 wgpu-hal/src/vulkan/device.rs    |  3 +++
 wgpu-hal/src/vulkan/mod.rs       |  1 +
 8 files changed, 69 insertions(+), 17 deletions(-)

diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs
index 1526f747aa..b48ff5def5 100644
--- a/wgpu-core/src/device/global.rs
+++ b/wgpu-core/src/device/global.rs
@@ -2335,6 +2335,7 @@ impl Global {
     }
 
     pub fn pipeline_cache_get_data<A: HalApi>(&self, id: id::PipelineCacheId) -> Option<Vec<u8>> {
+        use crate::pipeline_cache;
         api_log!("PipelineCache::get_data");
         let hub = A::hub(self);
 
@@ -2344,7 +2345,24 @@ impl Global {
                 return None;
             }
             if let Some(raw_cache) = cache.raw.as_ref() {
-                return unsafe { cache.device.raw().pipeline_cache_get_data(raw_cache) };
+                let vec = unsafe { cache.device.raw().pipeline_cache_get_data(raw_cache) };
+                let Some(mut vec) = vec else { return None };
+                let Some(validation_key) = cache.device.raw().pipeline_cache_validation_key()
+                else {
+                    return None;
+                };
+                let mut header_contents = [0; pipeline_cache::HEADER_LENGTH];
+                pipeline_cache::add_cache_header(
+                    &mut header_contents,
+                    &vec,
+                    &cache.device.adapter.raw.info,
+                    validation_key,
+                );
+
+                let deleted = vec.splice(..1, header_contents).collect::<Vec<_>>();
+                debug_assert!(deleted.is_empty());
+
+                return Some(vec);
             }
         }
         None
diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index 3aa84041b1..ea3facb44f 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -3524,17 +3524,33 @@ impl<A: HalApi> Device<A> {
         self: &Arc<Self>,
         desc: &pipeline::PipelineCacheDescriptor,
     ) -> Result<pipeline::PipelineCache<A>, pipeline::CreatePipelineCacheError> {
+        use crate::pipeline_cache;
         self.require_features(wgt::Features::PIPELINE_CACHE)?;
-        let mut cache_desc = hal::PipelineCacheDescriptor {
-            data: desc.data.as_deref(),
+        let data = if let Some((data, validation_key)) = desc
+            .data
+            .as_ref()
+            .zip(self.raw().pipeline_cache_validation_key())
+        {
+            let data = pipeline_cache::validate_pipeline_cache(
+                &data,
+                &self.adapter.raw.info,
+                validation_key,
+            );
+            match data {
+                Ok(data) => Some(data),
+                Err(e) if e.was_avoidable() || !desc.fallback => return Err(e.into()),
+                // If the error was unavoidable and we are asked to fallback, do so
+                Err(_) => None,
+            }
+        } else {
+            None
+        };
+        let cache_desc = hal::PipelineCacheDescriptor {
+            data,
             label: desc.label.to_hal(self.instance_flags),
         };
         let raw = match unsafe { self.raw().create_pipeline_cache(&cache_desc) } {
             Ok(raw) => raw,
-            Err(hal::PipelineCacheError::Validation) if desc.fallback => {
-                debug_assert!(cache_desc.data.take().is_some());
-                unsafe { self.raw().create_pipeline_cache(&cache_desc)? }
-            }
             Err(e) => return Err(e.into()),
         };
         let cache = pipeline::PipelineCache {
diff --git a/wgpu-core/src/pipeline.rs b/wgpu-core/src/pipeline.rs
index 2e7c6096bd..d02915fa64 100644
--- a/wgpu-core/src/pipeline.rs
+++ b/wgpu-core/src/pipeline.rs
@@ -268,7 +268,7 @@ pub enum CreatePipelineCacheError {
     #[error(transparent)]
     Device(#[from] DeviceError),
     #[error("Pipeline cache validation failed")]
-    Validation(PipelineCacheValidationError),
+    Validation(#[from] PipelineCacheValidationError),
     #[error(transparent)]
     MissingFeatures(#[from] MissingFeatures),
     #[error("Internal error: {0}")]
@@ -281,7 +281,6 @@ impl From<hal::PipelineCacheError> for CreatePipelineCacheError {
             hal::PipelineCacheError::Device(device) => {
                 CreatePipelineCacheError::Device(device.into())
             }
-            hal::PipelineCacheError::Validation => CreatePipelineCacheError::Validation,
         }
     }
 }
diff --git a/wgpu-core/src/pipeline_cache.rs b/wgpu-core/src/pipeline_cache.rs
index b03c1392c7..191c870c4c 100644
--- a/wgpu-core/src/pipeline_cache.rs
+++ b/wgpu-core/src/pipeline_cache.rs
@@ -250,9 +250,9 @@ impl<'a> Reader<'a> {
         Some(start.try_into().expect("off-by-one-error in array size"))
     }
 
-    fn read_u16(&mut self) -> Option<u16> {
-        self.read_array().map(u16::from_be_bytes)
-    }
+    // fn read_u16(&mut self) -> Option<u16> {
+    //     self.read_array().map(u16::from_be_bytes)
+    // }
     fn read_u32(&mut self) -> Option<u32> {
         self.read_array().map(u32::from_be_bytes)
     }
@@ -281,9 +281,9 @@ impl<'a> Writer<'a> {
         Some(())
     }
 
-    fn write_u16(&mut self, value: u16) -> Option<()> {
-        self.write_array(&value.to_be_bytes())
-    }
+    // fn write_u16(&mut self, value: u16) -> Option<()> {
+    //     self.write_array(&value.to_be_bytes())
+    // }
     fn write_u32(&mut self, value: u32) -> Option<()> {
         self.write_array(&value.to_be_bytes())
     }
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index 7e2803b743..770147edfb 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -336,8 +336,6 @@ pub enum PipelineError {
 pub enum PipelineCacheError {
     #[error(transparent)]
     Device(#[from] DeviceError),
-    #[error("Pipeline cache had a validation error")]
-    Validation,
 }
 
 #[derive(Clone, Debug, Eq, PartialEq, Error)]
@@ -624,6 +622,9 @@ pub trait Device: WasmNotSendSync {
         &self,
         desc: &PipelineCacheDescriptor<'_>,
     ) -> Result<A::PipelineCache, PipelineCacheError>;
+    fn pipeline_cache_validation_key(&self) -> Option<[u8; 16]> {
+        None
+    }
     unsafe fn destroy_pipeline_cache(&self, cache: A::PipelineCache);
 
     unsafe fn create_query_set(
diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs
index bf68dccbc0..4c9e708e0c 100644
--- a/wgpu-hal/src/vulkan/adapter.rs
+++ b/wgpu-hal/src/vulkan/adapter.rs
@@ -1776,6 +1776,19 @@ impl super::Adapter {
             unsafe { raw_device.get_device_queue(family_index, queue_index) }
         };
 
+        let driver_version = self
+            .phd_capabilities
+            .properties
+            .driver_version
+            .to_be_bytes();
+        #[rustfmt::skip]
+        let pipeline_cache_validation_key = [
+            driver_version[0], driver_version[1], driver_version[2], driver_version[3],
+            0, 0, 0, 0,
+            0, 0, 0, 0,
+            0, 0, 0, 0,
+        ];
+
         let shared = Arc::new(super::DeviceShared {
             raw: raw_device,
             family_index,
@@ -1790,6 +1803,7 @@ impl super::Adapter {
                 timeline_semaphore: timeline_semaphore_fn,
                 ray_tracing: ray_tracing_fns,
             },
+            pipeline_cache_validation_key,
             vendor_id: self.phd_capabilities.properties.vendor_id,
             timestamp_period: self.phd_capabilities.properties.limits.timestamp_period,
             private_caps: self.private_caps.clone(),
diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs
index 31ffec56f5..4dea6fe4d9 100644
--- a/wgpu-hal/src/vulkan/device.rs
+++ b/wgpu-hal/src/vulkan/device.rs
@@ -1968,6 +1968,9 @@ impl crate::Device for super::Device {
 
         Ok(PipelineCache { raw })
     }
+    fn pipeline_cache_validation_key(&self) -> Option<[u8; 16]> {
+        Some(self.shared.pipeline_cache_validation_key)
+    }
     unsafe fn destroy_pipeline_cache(&self, cache: PipelineCache) {
         unsafe { self.shared.raw.destroy_pipeline_cache(cache.raw, None) }
     }
diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs
index 53e7dfbf5a..7d1d377c4b 100644
--- a/wgpu-hal/src/vulkan/mod.rs
+++ b/wgpu-hal/src/vulkan/mod.rs
@@ -342,6 +342,7 @@ struct DeviceShared {
     enabled_extensions: Vec<&'static CStr>,
     extension_fns: DeviceExtensionFunctions,
     vendor_id: u32,
+    pipeline_cache_validation_key: [u8; 16],
     timestamp_period: f32,
     private_caps: PrivateCapabilities,
     workarounds: Workarounds,

From fea8a79f5e2751d9fbe5c1e3367d7dbf0e70f361 Mon Sep 17 00:00:00 2001
From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com>
Date: Mon, 18 Mar 2024 11:00:54 +0000
Subject: [PATCH 16/31] =?UTF-8?q?=F0=9F=93=8E?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 wgpu-core/src/device/global.rs   | 9 +++------
 wgpu-core/src/device/resource.rs | 2 +-
 wgpu-core/src/pipeline_cache.rs  | 6 +++---
 3 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs
index b48ff5def5..d20e664509 100644
--- a/wgpu-core/src/device/global.rs
+++ b/wgpu-core/src/device/global.rs
@@ -2345,12 +2345,9 @@ impl Global {
                 return None;
             }
             if let Some(raw_cache) = cache.raw.as_ref() {
-                let vec = unsafe { cache.device.raw().pipeline_cache_get_data(raw_cache) };
-                let Some(mut vec) = vec else { return None };
-                let Some(validation_key) = cache.device.raw().pipeline_cache_validation_key()
-                else {
-                    return None;
-                };
+                let mut vec = unsafe { cache.device.raw().pipeline_cache_get_data(raw_cache) }?;
+                let validation_key = cache.device.raw().pipeline_cache_validation_key()?;
+
                 let mut header_contents = [0; pipeline_cache::HEADER_LENGTH];
                 pipeline_cache::add_cache_header(
                     &mut header_contents,
diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index ea3facb44f..34f7208ad3 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -3532,7 +3532,7 @@ impl<A: HalApi> Device<A> {
             .zip(self.raw().pipeline_cache_validation_key())
         {
             let data = pipeline_cache::validate_pipeline_cache(
-                &data,
+                data,
                 &self.adapter.raw.info,
                 validation_key,
             );
diff --git a/wgpu-core/src/pipeline_cache.rs b/wgpu-core/src/pipeline_cache.rs
index 191c870c4c..4f7d07dadd 100644
--- a/wgpu-core/src/pipeline_cache.rs
+++ b/wgpu-core/src/pipeline_cache.rs
@@ -94,7 +94,7 @@ pub fn add_cache_header(
     validation_key: [u8; 16],
 ) {
     assert_eq!(in_region.len(), HEADER_LENGTH);
-    let data_hash = hash(&data);
+    let data_hash = hash(data);
     let header = PipelineCacheHeader {
         adapter_key: adapter_key(adapter)
             .expect("Called add_cache_header for an adapter which doesn't support cache data. This is a wgpu internal bug"),
@@ -234,7 +234,7 @@ struct Reader<'a> {
 
 impl<'a> Reader<'a> {
     fn read_byte(&mut self) -> Option<u8> {
-        let res = *self.data.get(0)?;
+        let res = *self.data.first()?;
         self.total_read += 1;
         self.data = &self.data[1..];
         Some(res)
@@ -274,7 +274,7 @@ impl<'a> Writer<'a> {
         if N > self.data.len() {
             return None;
         }
-        let data = std::mem::replace(&mut self.data, &mut []);
+        let data = std::mem::take(&mut self.data);
         let (start, data) = data.split_at_mut(N);
         self.data = data;
         start.copy_from_slice(array);

From 7ac7010cec4b3475ab77e084d54209d71e46a270 Mon Sep 17 00:00:00 2001
From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com>
Date: Mon, 18 Mar 2024 15:42:14 +0000
Subject: [PATCH 17/31] Fix enforced truncation

---
 wgpu-core/src/device/global.rs  | 2 +-
 wgpu-core/src/pipeline_cache.rs | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs
index d20e664509..3a9ad0b8be 100644
--- a/wgpu-core/src/device/global.rs
+++ b/wgpu-core/src/device/global.rs
@@ -2356,7 +2356,7 @@ impl Global {
                     validation_key,
                 );
 
-                let deleted = vec.splice(..1, header_contents).collect::<Vec<_>>();
+                let deleted = vec.splice(..0, header_contents).collect::<Vec<_>>();
                 debug_assert!(deleted.is_empty());
 
                 return Some(vec);
diff --git a/wgpu-core/src/pipeline_cache.rs b/wgpu-core/src/pipeline_cache.rs
index 4f7d07dadd..47e97eb591 100644
--- a/wgpu-core/src/pipeline_cache.rs
+++ b/wgpu-core/src/pipeline_cache.rs
@@ -6,7 +6,7 @@ pub const HEADER_LENGTH: usize = std::mem::size_of::<PipelineCacheHeader>();
 #[derive(Debug, PartialEq, Eq, Clone, Error)]
 #[non_exhaustive]
 pub enum PipelineCacheValidationError {
-    #[error("The pipeline cache data truncataed")]
+    #[error("The pipeline cache data was truncated")]
     Truncated,
     #[error("The pipeline cache data was longer than recorded")]
     // TODO: Is it plausible that this would happen

From ca65d7e76626e30df6b3f832dc0d97bbefff6b8c Mon Sep 17 00:00:00 2001
From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com>
Date: Fri, 5 Apr 2024 15:18:29 +0100
Subject: [PATCH 18/31] Punt the hash validation

We don't have any current dependencies which would
compute this, and it is probably superfluous
---
 wgpu-core/src/pipeline_cache.rs | 42 ++++++++++++++++++---------------
 1 file changed, 23 insertions(+), 19 deletions(-)

diff --git a/wgpu-core/src/pipeline_cache.rs b/wgpu-core/src/pipeline_cache.rs
index 47e97eb591..c46a53b4f6 100644
--- a/wgpu-core/src/pipeline_cache.rs
+++ b/wgpu-core/src/pipeline_cache.rs
@@ -80,8 +80,7 @@ pub fn validate_pipeline_cache<'d>(
     if remaining_data.len() > data_size {
         return Err(PipelineCacheValidationError::Extended);
     }
-    let hash = hash(remaining_data);
-    if header.data_hash != hash {
+    if header.hash_space != HASH_SPACE_VALUE {
         return Err(PipelineCacheValidationError::Corrupted);
     }
     Ok(remaining_data)
@@ -94,7 +93,6 @@ pub fn add_cache_header(
     validation_key: [u8; 16],
 ) {
     assert_eq!(in_region.len(), HEADER_LENGTH);
-    let data_hash = hash(data);
     let header = PipelineCacheHeader {
         adapter_key: adapter_key(adapter)
             .expect("Called add_cache_header for an adapter which doesn't support cache data. This is a wgpu internal bug"),
@@ -103,7 +101,7 @@ pub fn add_cache_header(
         magic: MAGIC,
         header_version: HEADER_VERSION,
         validation_key,
-        data_hash,
+        hash_space: HASH_SPACE_VALUE,
         data_size: data
             .len()
             .try_into()
@@ -116,6 +114,17 @@ const MAGIC: [u8; 8] = *b"WGPUPLCH";
 const HEADER_VERSION: u32 = 1;
 const ABI: u32 = std::mem::size_of::<*const ()>() as u32;
 
+/// The value used to fill [`PipelineCacheHeader::hash_space`]
+///
+/// If we receive reports of pipeline cache data corruption which is not otherwise caught
+/// on a real device, it would be worth modifying this
+///
+/// Note that wgpu does not protect against malicious writes to e.g. a file used
+/// to store a pipeline cache.
+/// That is the resonsibility of the end application, such as by using a
+/// private space.
+const HASH_SPACE_VALUE: u64 = 0xFEDCBA9_876543210;
+
 #[repr(C)]
 #[derive(PartialEq, Eq)]
 struct PipelineCacheHeader {
@@ -135,7 +144,7 @@ struct PipelineCacheHeader {
     cache_abi: u32,
     /// The id for the backend in use, from [wgt::Backend]
     backend: u8,
-    /// The hash key which identifiers the device/adapter.
+    /// The key which identifiers the device/adapter.
     /// This is used to validate that this pipeline cache (probably) was produced for
     /// the expected device.
     /// On Vulkan: it is a combination of vendor ID and device ID
@@ -146,9 +155,13 @@ struct PipelineCacheHeader {
     validation_key: [u8; 16],
     /// The length of the data which is sent to/recieved from the backend
     data_size: u64,
-    /// The hash of the data which is sent to/recieved from the backend, and which
-    /// follows this header. That should be the remainder of the memory
-    data_hash: u64,
+    /// Space reserved for a hash of the data in future
+    ///
+    /// We assume that your cache storage system will be relatively robust, and so
+    /// do not validate this hash
+    ///
+    /// Therefore, this will always have a value of [`RESERVED_FOR_HASH`]
+    hash_space: u64,
 }
 
 impl PipelineCacheHeader {
@@ -180,7 +193,7 @@ impl PipelineCacheHeader {
                 adapter_key,
                 validation_key,
                 data_size,
-                data_hash,
+                hash_space: data_hash,
             },
             reader.data,
         ))
@@ -195,7 +208,7 @@ impl PipelineCacheHeader {
         writer.write_array(&self.adapter_key)?;
         writer.write_array(&self.validation_key)?;
         writer.write_u64(self.data_size)?;
-        writer.write_u64(self.data_hash)?;
+        writer.write_u64(self.hash_space)?;
 
         assert_eq!(writer.data.len(), 0);
         Some(())
@@ -218,15 +231,6 @@ fn adapter_key(adapter: &AdapterInfo) -> Result<[u8; 15], PipelineCacheValidatio
     }
 }
 
-fn hash(data: &[u8]) -> u64 {
-    log::warn!(
-        "Using fake 'hash' for {} bytes of data. Data might become invalid",
-        data.len()
-    );
-    // TODO: Actually do a proper hash
-    0xFEDCBA9_876543210
-}
-
 struct Reader<'a> {
     data: &'a [u8],
     total_read: usize,

From c6f16695f1bde6952bb8312a1a1ad1c644181c91 Mon Sep 17 00:00:00 2001
From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com>
Date: Fri, 19 Apr 2024 11:41:09 +0100
Subject: [PATCH 19/31] Move the cache to PipelineCompilationOptions

---
 deno_webgpu/pipeline.rs                       |  5 +-
 examples/src/boids/mod.rs                     |  1 -
 examples/src/bunnymark/mod.rs                 |  1 -
 examples/src/conservative_raster/mod.rs       |  4 --
 examples/src/cube/mod.rs                      |  2 -
 examples/src/hello_triangle/mod.rs            |  1 -
 examples/src/mipmap/mod.rs                    |  2 -
 examples/src/msaa_line/mod.rs                 |  1 -
 examples/src/render_to_texture/mod.rs         |  1 -
 examples/src/shadow/mod.rs                    |  2 -
 examples/src/skybox/mod.rs                    |  2 -
 examples/src/srgb_blend/mod.rs                |  1 -
 examples/src/stencil_triangles/mod.rs         |  2 -
 examples/src/texture_arrays/mod.rs            |  1 -
 examples/src/timestamp_queries/mod.rs         |  1 -
 examples/src/uniform_values/mod.rs            |  1 -
 examples/src/water/mod.rs                     |  3 --
 tests/tests/bgra8unorm_storage.rs             |  1 -
 tests/tests/device.rs                         |  1 -
 tests/tests/mem_leaks.rs                      |  1 -
 tests/tests/nv12_texture/mod.rs               |  1 -
 tests/tests/occlusion_query/mod.rs            |  1 -
 tests/tests/regression/issue_3349.rs          |  1 -
 tests/tests/regression/issue_3457.rs          |  2 -
 tests/tests/scissor_tests/mod.rs              |  1 -
 tests/tests/shader_primitive_index/mod.rs     |  1 -
 tests/tests/shader_view_format/mod.rs         |  1 -
 tests/tests/vertex_indices/mod.rs             |  1 -
 wgpu-core/src/device/global.rs                |  2 +-
 wgpu-core/src/device/resource.rs              | 48 ++++++++++++-------
 wgpu-core/src/pipeline.rs                     |  6 +--
 wgpu-hal/examples/halmark/main.rs             |  3 +-
 wgpu-hal/examples/ray-traced-triangle/main.rs |  2 +-
 wgpu-hal/src/lib.rs                           | 14 ++++--
 wgpu-hal/src/vulkan/device.rs                 | 13 +++--
 wgpu/src/backend/wgpu_core.rs                 |  5 +-
 wgpu/src/lib.rs                               | 13 ++---
 37 files changed, 66 insertions(+), 83 deletions(-)

diff --git a/deno_webgpu/pipeline.rs b/deno_webgpu/pipeline.rs
index b4d2f8d36e..6ba3a3e516 100644
--- a/deno_webgpu/pipeline.rs
+++ b/deno_webgpu/pipeline.rs
@@ -114,8 +114,8 @@ pub fn op_webgpu_create_compute_pipeline(
             entry_point: compute.entry_point.map(Cow::from),
             constants: Cow::Owned(compute.constants),
             zero_initialize_workgroup_memory: true,
+            cache: None,
         },
-        cache: None,
     };
     let implicit_pipelines = match layout {
         GPUPipelineLayoutOrGPUAutoLayoutMode::Layout(_) => None,
@@ -363,6 +363,7 @@ pub fn op_webgpu_create_render_pipeline(
                 constants: Cow::Owned(fragment.constants),
                 // Required to be true for WebGPU
                 zero_initialize_workgroup_memory: true,
+                cache: None,
             },
             targets: Cow::Owned(fragment.targets),
         })
@@ -388,6 +389,7 @@ pub fn op_webgpu_create_render_pipeline(
                 constants: Cow::Owned(args.vertex.constants),
                 // Required to be true for WebGPU
                 zero_initialize_workgroup_memory: true,
+                cache: None,
             },
             buffers: Cow::Owned(vertex_buffers),
         },
@@ -396,7 +398,6 @@ pub fn op_webgpu_create_render_pipeline(
         multisample: args.multisample,
         fragment,
         multiview: None,
-        cache: None,
     };
 
     let implicit_pipelines = match args.layout {
diff --git a/examples/src/boids/mod.rs b/examples/src/boids/mod.rs
index 67c69d349b..6c8bb6e76c 100644
--- a/examples/src/boids/mod.rs
+++ b/examples/src/boids/mod.rs
@@ -156,7 +156,6 @@ impl crate::framework::Example for Example {
             depth_stencil: None,
             multisample: wgpu::MultisampleState::default(),
             multiview: None,
-            cache: None,
         });
 
         // create compute pipeline
diff --git a/examples/src/bunnymark/mod.rs b/examples/src/bunnymark/mod.rs
index b5b33b54d5..679fc5014a 100644
--- a/examples/src/bunnymark/mod.rs
+++ b/examples/src/bunnymark/mod.rs
@@ -224,7 +224,6 @@ impl crate::framework::Example for Example {
             depth_stencil: None,
             multisample: wgpu::MultisampleState::default(),
             multiview: None,
-            cache: None,
         });
 
         let texture = {
diff --git a/examples/src/conservative_raster/mod.rs b/examples/src/conservative_raster/mod.rs
index 116ed8623b..89500a798f 100644
--- a/examples/src/conservative_raster/mod.rs
+++ b/examples/src/conservative_raster/mod.rs
@@ -113,7 +113,6 @@ impl crate::framework::Example for Example {
                 depth_stencil: None,
                 multisample: wgpu::MultisampleState::default(),
                 multiview: None,
-                cache: None,
             });
 
         let pipeline_triangle_regular =
@@ -136,7 +135,6 @@ impl crate::framework::Example for Example {
                 depth_stencil: None,
                 multisample: wgpu::MultisampleState::default(),
                 multiview: None,
-                cache: None,
             });
 
         let pipeline_lines = if device
@@ -167,7 +165,6 @@ impl crate::framework::Example for Example {
                     depth_stencil: None,
                     multisample: wgpu::MultisampleState::default(),
                     multiview: None,
-                    cache: None,
                 }),
             )
         } else {
@@ -227,7 +224,6 @@ impl crate::framework::Example for Example {
                     depth_stencil: None,
                     multisample: wgpu::MultisampleState::default(),
                     multiview: None,
-                    cache: None,
                 }),
                 bind_group_layout,
             )
diff --git a/examples/src/cube/mod.rs b/examples/src/cube/mod.rs
index 9828157e57..9347627812 100644
--- a/examples/src/cube/mod.rs
+++ b/examples/src/cube/mod.rs
@@ -260,7 +260,6 @@ impl crate::framework::Example for Example {
             depth_stencil: None,
             multisample: wgpu::MultisampleState::default(),
             multiview: None,
-            cache: None,
         });
 
         let pipeline_wire = if device
@@ -302,7 +301,6 @@ impl crate::framework::Example for Example {
                 depth_stencil: None,
                 multisample: wgpu::MultisampleState::default(),
                 multiview: None,
-                cache: None,
             });
             Some(pipeline_wire)
         } else {
diff --git a/examples/src/hello_triangle/mod.rs b/examples/src/hello_triangle/mod.rs
index e4d42674f7..79162a6956 100644
--- a/examples/src/hello_triangle/mod.rs
+++ b/examples/src/hello_triangle/mod.rs
@@ -72,7 +72,6 @@ async fn run(event_loop: EventLoop<()>, window: Window) {
         depth_stencil: None,
         multisample: wgpu::MultisampleState::default(),
         multiview: None,
-        cache: None,
     });
 
     let mut config = surface
diff --git a/examples/src/mipmap/mod.rs b/examples/src/mipmap/mod.rs
index eaed9c82e7..0848e94e10 100644
--- a/examples/src/mipmap/mod.rs
+++ b/examples/src/mipmap/mod.rs
@@ -109,7 +109,6 @@ impl Example {
             depth_stencil: None,
             multisample: wgpu::MultisampleState::default(),
             multiview: None,
-            cache: None,
         });
 
         let bind_group_layout = pipeline.get_bind_group_layout(0);
@@ -311,7 +310,6 @@ impl crate::framework::Example for Example {
             depth_stencil: None,
             multisample: wgpu::MultisampleState::default(),
             multiview: None,
-            cache: None,
         });
 
         // Create bind group
diff --git a/examples/src/msaa_line/mod.rs b/examples/src/msaa_line/mod.rs
index 46bb743e99..cd22e75bc4 100644
--- a/examples/src/msaa_line/mod.rs
+++ b/examples/src/msaa_line/mod.rs
@@ -78,7 +78,6 @@ impl Example {
                 ..Default::default()
             },
             multiview: None,
-            cache: None,
         });
         let mut encoder =
             device.create_render_bundle_encoder(&wgpu::RenderBundleEncoderDescriptor {
diff --git a/examples/src/render_to_texture/mod.rs b/examples/src/render_to_texture/mod.rs
index caed736741..5e571dc74e 100644
--- a/examples/src/render_to_texture/mod.rs
+++ b/examples/src/render_to_texture/mod.rs
@@ -72,7 +72,6 @@ async fn run(_path: Option<String>) {
         depth_stencil: None,
         multisample: wgpu::MultisampleState::default(),
         multiview: None,
-        cache: None,
     });
 
     log::info!("Wgpu context set up.");
diff --git a/examples/src/shadow/mod.rs b/examples/src/shadow/mod.rs
index b2c27f5892..2cb6d6f3e2 100644
--- a/examples/src/shadow/mod.rs
+++ b/examples/src/shadow/mod.rs
@@ -526,7 +526,6 @@ impl crate::framework::Example for Example {
                 }),
                 multisample: wgpu::MultisampleState::default(),
                 multiview: None,
-                cache: None,
             });
 
             Pass {
@@ -661,7 +660,6 @@ impl crate::framework::Example for Example {
                 }),
                 multisample: wgpu::MultisampleState::default(),
                 multiview: None,
-                cache: None,
             });
 
             Pass {
diff --git a/examples/src/skybox/mod.rs b/examples/src/skybox/mod.rs
index e526feedae..35a4266d20 100644
--- a/examples/src/skybox/mod.rs
+++ b/examples/src/skybox/mod.rs
@@ -221,7 +221,6 @@ impl crate::framework::Example for Example {
             }),
             multisample: wgpu::MultisampleState::default(),
             multiview: None,
-            cache: None,
         });
         let entity_pipeline = device.create_render_pipeline(&wgpu::RenderPipelineDescriptor {
             label: Some("Entity"),
@@ -255,7 +254,6 @@ impl crate::framework::Example for Example {
             }),
             multisample: wgpu::MultisampleState::default(),
             multiview: None,
-            cache: None,
         });
 
         let sampler = device.create_sampler(&wgpu::SamplerDescriptor {
diff --git a/examples/src/srgb_blend/mod.rs b/examples/src/srgb_blend/mod.rs
index 314fc92df2..f701aff989 100644
--- a/examples/src/srgb_blend/mod.rs
+++ b/examples/src/srgb_blend/mod.rs
@@ -151,7 +151,6 @@ impl<const SRGB: bool> crate::framework::Example for Example<SRGB> {
             depth_stencil: None,
             multisample: wgpu::MultisampleState::default(),
             multiview: None,
-            cache: None,
         });
 
         // Done
diff --git a/examples/src/stencil_triangles/mod.rs b/examples/src/stencil_triangles/mod.rs
index 8d638d20d1..e0f495177f 100644
--- a/examples/src/stencil_triangles/mod.rs
+++ b/examples/src/stencil_triangles/mod.rs
@@ -106,7 +106,6 @@ impl crate::framework::Example for Example {
             }),
             multisample: wgpu::MultisampleState::default(),
             multiview: None,
-            cache: None,
         });
 
         let outer_pipeline = device.create_render_pipeline(&wgpu::RenderPipelineDescriptor {
@@ -142,7 +141,6 @@ impl crate::framework::Example for Example {
             }),
             multisample: wgpu::MultisampleState::default(),
             multiview: None,
-            cache: None,
         });
 
         let stencil_buffer = device.create_texture(&wgpu::TextureDescriptor {
diff --git a/examples/src/texture_arrays/mod.rs b/examples/src/texture_arrays/mod.rs
index b0f474b957..dd7b4ec89a 100644
--- a/examples/src/texture_arrays/mod.rs
+++ b/examples/src/texture_arrays/mod.rs
@@ -341,7 +341,6 @@ impl crate::framework::Example for Example {
             depth_stencil: None,
             multisample: wgpu::MultisampleState::default(),
             multiview: None,
-            cache: None
         });
 
         Self {
diff --git a/examples/src/timestamp_queries/mod.rs b/examples/src/timestamp_queries/mod.rs
index 0d8345ddfa..7a501637d4 100644
--- a/examples/src/timestamp_queries/mod.rs
+++ b/examples/src/timestamp_queries/mod.rs
@@ -366,7 +366,6 @@ fn render_pass(
         depth_stencil: None,
         multisample: wgpu::MultisampleState::default(),
         multiview: None,
-        cache: None,
     });
     let render_target = device.create_texture(&wgpu::TextureDescriptor {
         label: Some("rendertarget"),
diff --git a/examples/src/uniform_values/mod.rs b/examples/src/uniform_values/mod.rs
index c53a189722..06780c8aef 100644
--- a/examples/src/uniform_values/mod.rs
+++ b/examples/src/uniform_values/mod.rs
@@ -192,7 +192,6 @@ impl WgpuContext {
             depth_stencil: None,
             multisample: wgpu::MultisampleState::default(),
             multiview: None,
-            cache: None,
         });
         let surface_config = surface
             .get_default_config(&adapter, size.width, size.height)
diff --git a/examples/src/water/mod.rs b/examples/src/water/mod.rs
index 2aefa85c6b..94f12895a8 100644
--- a/examples/src/water/mod.rs
+++ b/examples/src/water/mod.rs
@@ -574,8 +574,6 @@ impl crate::framework::Example for Example {
             // No multisampling is used.
             multisample: wgpu::MultisampleState::default(),
             multiview: None,
-            // Pipeline caching is not used
-            cache: None,
         });
 
         // Same idea as the water pipeline.
@@ -612,7 +610,6 @@ impl crate::framework::Example for Example {
             }),
             multisample: wgpu::MultisampleState::default(),
             multiview: None,
-            cache: None
         });
 
         // A render bundle to draw the terrain.
diff --git a/tests/tests/bgra8unorm_storage.rs b/tests/tests/bgra8unorm_storage.rs
index 7bc117f097..17082a9ed4 100644
--- a/tests/tests/bgra8unorm_storage.rs
+++ b/tests/tests/bgra8unorm_storage.rs
@@ -98,7 +98,6 @@ static BGRA8_UNORM_STORAGE: GpuTestConfiguration = GpuTestConfiguration::new()
             entry_point: "main",
             compilation_options: Default::default(),
             module: &module,
-            cache: None,
         });
 
         let mut encoder =
diff --git a/tests/tests/device.rs b/tests/tests/device.rs
index be3d3757ae..649a850fa9 100644
--- a/tests/tests/device.rs
+++ b/tests/tests/device.rs
@@ -488,7 +488,6 @@ static DEVICE_DESTROY_THEN_MORE: GpuTestConfiguration = GpuTestConfiguration::ne
                     multisample: wgpu::MultisampleState::default(),
                     fragment: None,
                     multiview: None,
-                    cache: None,
                 });
         });
 
diff --git a/tests/tests/mem_leaks.rs b/tests/tests/mem_leaks.rs
index 3c59aec036..7002ebabe0 100644
--- a/tests/tests/mem_leaks.rs
+++ b/tests/tests/mem_leaks.rs
@@ -113,7 +113,6 @@ async fn draw_test_with_reports(
                 })],
             }),
             multiview: None,
-            cache: None,
         });
 
     let global_report = ctx.instance.generate_report().unwrap();
diff --git a/tests/tests/nv12_texture/mod.rs b/tests/tests/nv12_texture/mod.rs
index fa386f8653..70ee849831 100644
--- a/tests/tests/nv12_texture/mod.rs
+++ b/tests/tests/nv12_texture/mod.rs
@@ -41,7 +41,6 @@ static NV12_TEXTURE_CREATION_SAMPLING: GpuTestConfiguration = GpuTestConfigurati
                 depth_stencil: None,
                 multisample: wgpu::MultisampleState::default(),
                 multiview: None,
-                cache: None,
             });
 
         let tex = ctx.device.create_texture(&wgpu::TextureDescriptor {
diff --git a/tests/tests/occlusion_query/mod.rs b/tests/tests/occlusion_query/mod.rs
index a888320e28..1a68ecf79d 100644
--- a/tests/tests/occlusion_query/mod.rs
+++ b/tests/tests/occlusion_query/mod.rs
@@ -51,7 +51,6 @@ static OCCLUSION_QUERY: GpuTestConfiguration = GpuTestConfiguration::new()
                 }),
                 multisample: wgpu::MultisampleState::default(),
                 multiview: None,
-                cache: None,
             });
 
         // Create occlusion query set
diff --git a/tests/tests/regression/issue_3349.rs b/tests/tests/regression/issue_3349.rs
index 35d35e5bdf..74c466b45a 100644
--- a/tests/tests/regression/issue_3349.rs
+++ b/tests/tests/regression/issue_3349.rs
@@ -119,7 +119,6 @@ async fn multi_stage_data_binding_test(ctx: TestingContext) {
             depth_stencil: None,
             multisample: wgpu::MultisampleState::default(),
             multiview: None,
-            cache: None,
         });
 
     let texture = ctx.device.create_texture(&wgpu::TextureDescriptor {
diff --git a/tests/tests/regression/issue_3457.rs b/tests/tests/regression/issue_3457.rs
index f0f7e64636..f18d681ae1 100644
--- a/tests/tests/regression/issue_3457.rs
+++ b/tests/tests/regression/issue_3457.rs
@@ -80,7 +80,6 @@ static PASS_RESET_VERTEX_BUFFER: GpuTestConfiguration =
                     })],
                 }),
                 multiview: None,
-                cache: None,
             });
 
         let single_pipeline = ctx
@@ -112,7 +111,6 @@ static PASS_RESET_VERTEX_BUFFER: GpuTestConfiguration =
                     })],
                 }),
                 multiview: None,
-                cache: None,
             });
 
         let view = ctx
diff --git a/tests/tests/scissor_tests/mod.rs b/tests/tests/scissor_tests/mod.rs
index 3f1e7df135..15c35644e5 100644
--- a/tests/tests/scissor_tests/mod.rs
+++ b/tests/tests/scissor_tests/mod.rs
@@ -61,7 +61,6 @@ async fn scissor_test_impl(
                 })],
             }),
             multiview: None,
-            cache: None,
         });
 
     let readback_buffer = image::ReadbackBuffers::new(&ctx.device, &texture);
diff --git a/tests/tests/shader_primitive_index/mod.rs b/tests/tests/shader_primitive_index/mod.rs
index 9972f81aa1..fb43397830 100644
--- a/tests/tests/shader_primitive_index/mod.rs
+++ b/tests/tests/shader_primitive_index/mod.rs
@@ -147,7 +147,6 @@ async fn pulling_common(
                 })],
             }),
             multiview: None,
-            cache: None,
         });
 
     let width = 2;
diff --git a/tests/tests/shader_view_format/mod.rs b/tests/tests/shader_view_format/mod.rs
index d34b8d851d..53c642bf7a 100644
--- a/tests/tests/shader_view_format/mod.rs
+++ b/tests/tests/shader_view_format/mod.rs
@@ -109,7 +109,6 @@ async fn reinterpret(
             depth_stencil: None,
             multisample: wgpu::MultisampleState::default(),
             multiview: None,
-            cache: None,
         });
     let bind_group = ctx.device.create_bind_group(&wgpu::BindGroupDescriptor {
         layout: &pipeline.get_bind_group_layout(0),
diff --git a/tests/tests/vertex_indices/mod.rs b/tests/tests/vertex_indices/mod.rs
index 7bd172d850..cad7e731d1 100644
--- a/tests/tests/vertex_indices/mod.rs
+++ b/tests/tests/vertex_indices/mod.rs
@@ -295,7 +295,6 @@ async fn vertex_index_common(ctx: TestingContext) {
             })],
         }),
         multiview: None,
-        cache: None,
     };
     let builtin_pipeline = ctx.device.create_render_pipeline(&pipeline_desc);
     pipeline_desc.vertex.entry_point = "vs_main_buffers";
diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs
index 3a9ad0b8be..c8739ef89a 100644
--- a/wgpu-core/src/device/global.rs
+++ b/wgpu-core/src/device/global.rs
@@ -1863,7 +1863,7 @@ impl Global {
             let cache = unsafe { device.create_pipeline_cache(desc) };
             match cache {
                 Ok(cache) => {
-                    let (id, _) = fid.assign(cache);
+                    let (id, _) = fid.assign(Arc::new(cache));
                     api_log!("Device::create_pipeline_cache -> {id:?}");
                     return (id, None);
                 }
diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index 34f7208ad3..b09c51bd28 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -2823,7 +2823,7 @@ impl<A: HalApi> Device<A> {
             Device::make_late_sized_buffer_groups(&shader_binding_sizes, &pipeline_layout);
 
         let cache = 'cache: {
-            let Some(cache) = desc.cache else {
+            let Some(cache) = desc.stage.cache else {
                 break 'cache None;
             };
             let Ok(cache) = hub.pipeline_caches.get(cache) else {
@@ -2844,8 +2844,8 @@ impl<A: HalApi> Device<A> {
                 entry_point: final_entry_point_name.as_ref(),
                 constants: desc.stage.constants.as_ref(),
                 zero_initialize_workgroup_memory: desc.stage.zero_initialize_workgroup_memory,
+                cache: cache.as_ref().and_then(|it| it.raw.as_ref()),
             },
-            cache: cache.as_ref().and_then(|it| it.raw.as_ref()),
         };
 
         let raw = unsafe {
@@ -3219,6 +3219,7 @@ impl<A: HalApi> Device<A> {
 
         let vertex_shader_module;
         let vertex_entry_point_name;
+        let vertex_cache;
         let vertex_stage = {
             let stage_desc = &desc.vertex.stage;
             let stage = wgt::ShaderStages::VERTEX;
@@ -3256,16 +3257,31 @@ impl<A: HalApi> Device<A> {
                 validated_stages |= stage;
             }
 
+            vertex_cache = if let Some(cache) = stage_desc.cache {
+                if let Ok(cache) = hub.pipeline_caches.get(cache) {
+                    if cache.device.as_info().id() != self.as_info().id() {
+                        return Err(DeviceError::WrongDevice.into());
+                    }
+                    Some(cache)
+                } else {
+                    None
+                }
+            } else {
+                None
+            };
+
             hal::ProgrammableStage {
                 module: vertex_shader_module.raw(),
                 entry_point: &vertex_entry_point_name,
                 constants: stage_desc.constants.as_ref(),
                 zero_initialize_workgroup_memory: stage_desc.zero_initialize_workgroup_memory,
+                cache: vertex_cache.as_ref().and_then(|it| it.raw.as_ref()),
             }
         };
 
         let mut fragment_shader_module = None;
         let fragment_entry_point_name;
+        let fragment_cache;
         let fragment_stage = match desc.fragment {
             Some(ref fragment_state) => {
                 let stage = wgt::ShaderStages::FRAGMENT;
@@ -3317,6 +3333,19 @@ impl<A: HalApi> Device<A> {
                         })?;
                 }
 
+                fragment_cache = if let Some(cache) = fragment_state.stage.cache {
+                    if let Ok(cache) = hub.pipeline_caches.get(cache) {
+                        if cache.device.as_info().id() != self.as_info().id() {
+                            return Err(DeviceError::WrongDevice.into());
+                        }
+                        Some(cache)
+                    } else {
+                        None
+                    }
+                } else {
+                    None
+                };
+
                 Some(hal::ProgrammableStage {
                     module: shader_module.raw(),
                     entry_point: &fragment_entry_point_name,
@@ -3324,6 +3353,7 @@ impl<A: HalApi> Device<A> {
                     zero_initialize_workgroup_memory: fragment_state
                         .stage
                         .zero_initialize_workgroup_memory,
+                    cache: fragment_cache.as_ref().and_then(|it| it.raw.as_ref()),
                 })
             }
             None => None,
@@ -3410,19 +3440,6 @@ impl<A: HalApi> Device<A> {
             }
         }
 
-        let cache = if let Some(cache) = desc.cache {
-            if let Ok(cache) = hub.pipeline_caches.get(cache) {
-                if cache.device.as_info().id() != self.as_info().id() {
-                    return Err(DeviceError::WrongDevice.into());
-                }
-                Some(cache)
-            } else {
-                None
-            }
-        } else {
-            None
-        };
-
         let late_sized_buffer_groups =
             Device::make_late_sized_buffer_groups(&shader_binding_sizes, &pipeline_layout);
 
@@ -3437,7 +3454,6 @@ impl<A: HalApi> Device<A> {
             fragment_stage,
             color_targets,
             multiview: desc.multiview,
-            cache: cache.as_ref().and_then(|it| it.raw.as_ref()),
         };
         let raw = unsafe {
             self.raw
diff --git a/wgpu-core/src/pipeline.rs b/wgpu-core/src/pipeline.rs
index d02915fa64..4b7f402662 100644
--- a/wgpu-core/src/pipeline.rs
+++ b/wgpu-core/src/pipeline.rs
@@ -166,6 +166,8 @@ pub struct ProgrammableStageDescriptor<'a> {
     /// This is required by the WebGPU spec, but may have overhead which can be avoided
     /// for cross-platform applications
     pub zero_initialize_workgroup_memory: bool,
+    /// The pipeline cache to use when creating this pipeline.
+    pub cache: Option<PipelineCacheId>,
 }
 
 /// Number of implicit bind groups derived at pipeline creation.
@@ -193,8 +195,6 @@ pub struct ComputePipelineDescriptor<'a> {
     pub layout: Option<PipelineLayoutId>,
     /// The compiled compute stage and its entry point.
     pub stage: ProgrammableStageDescriptor<'a>,
-    /// The pipeline cache to use when creating this pipeline.
-    pub cache: Option<PipelineCacheId>,
 }
 
 #[derive(Clone, Debug, Error)]
@@ -380,8 +380,6 @@ pub struct RenderPipelineDescriptor<'a> {
     /// If the pipeline will be used with a multiview render pass, this indicates how many array
     /// layers the attachments will have.
     pub multiview: Option<NonZeroU32>,
-    /// The pipeline cache to use when creating this pipeline.
-    pub cache: Option<PipelineCacheId>,
 }
 
 #[derive(Clone, Debug)]
diff --git a/wgpu-hal/examples/halmark/main.rs b/wgpu-hal/examples/halmark/main.rs
index ee59fa2590..f91c1d5778 100644
--- a/wgpu-hal/examples/halmark/main.rs
+++ b/wgpu-hal/examples/halmark/main.rs
@@ -254,6 +254,7 @@ impl<A: hal::Api> Example<A> {
                 entry_point: "vs_main",
                 constants: &constants,
                 zero_initialize_workgroup_memory: true,
+                cache: None,
             },
             vertex_buffers: &[],
             fragment_stage: Some(hal::ProgrammableStage {
@@ -261,6 +262,7 @@ impl<A: hal::Api> Example<A> {
                 entry_point: "fs_main",
                 constants: &constants,
                 zero_initialize_workgroup_memory: true,
+                cache: None,
             }),
             primitive: wgt::PrimitiveState {
                 topology: wgt::PrimitiveTopology::TriangleStrip,
@@ -274,7 +276,6 @@ impl<A: hal::Api> Example<A> {
                 write_mask: wgt::ColorWrites::default(),
             })],
             multiview: None,
-            cache: None,
         };
         let pipeline = unsafe { device.create_render_pipeline(&pipeline_desc).unwrap() };
 
diff --git a/wgpu-hal/examples/ray-traced-triangle/main.rs b/wgpu-hal/examples/ray-traced-triangle/main.rs
index 8f404dc4d2..f6806bb132 100644
--- a/wgpu-hal/examples/ray-traced-triangle/main.rs
+++ b/wgpu-hal/examples/ray-traced-triangle/main.rs
@@ -373,8 +373,8 @@ impl<A: hal::Api> Example<A> {
                     entry_point: "main",
                     constants: &Default::default(),
                     zero_initialize_workgroup_memory: true,
+                    cache: None,
                 },
-                cache: None,
             })
         }
         .unwrap();
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index 770147edfb..cc68efdbb2 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -621,11 +621,11 @@ pub trait Device: WasmNotSendSync {
     unsafe fn create_pipeline_cache(
         &self,
         desc: &PipelineCacheDescriptor<'_>,
-    ) -> Result<A::PipelineCache, PipelineCacheError>;
+    ) -> Result<<Self::A as Api>::PipelineCache, PipelineCacheError>;
     fn pipeline_cache_validation_key(&self) -> Option<[u8; 16]> {
         None
     }
-    unsafe fn destroy_pipeline_cache(&self, cache: A::PipelineCache);
+    unsafe fn destroy_pipeline_cache(&self, cache: <Self::A as Api>::PipelineCache);
 
     unsafe fn create_query_set(
         &self,
@@ -668,7 +668,10 @@ pub trait Device: WasmNotSendSync {
     unsafe fn stop_capture(&self);
 
     #[allow(unused_variables)]
-    unsafe fn pipeline_cache_get_data(&self, cache: &A::PipelineCache) -> Option<Vec<u8>> {
+    unsafe fn pipeline_cache_get_data(
+        &self,
+        cache: &<Self::A as Api>::PipelineCache,
+    ) -> Option<Vec<u8>> {
         None
     }
 
@@ -1634,6 +1637,8 @@ pub struct ProgrammableStage<'a, A: Api> {
     /// This is required by the WebGPU spec, but may have overhead which can be avoided
     /// for cross-platform applications
     pub zero_initialize_workgroup_memory: bool,
+    /// The cache which will be used and filled when compiling this pipeline
+    pub cache: Option<&'a A::PipelineCache>,
 }
 
 // Rust gets confused about the impl requirements for `A`
@@ -1644,6 +1649,7 @@ impl<A: Api> Clone for ProgrammableStage<'_, A> {
             entry_point: self.entry_point,
             constants: self.constants,
             zero_initialize_workgroup_memory: self.zero_initialize_workgroup_memory,
+            cache: self.cache,
         }
     }
 }
@@ -1656,7 +1662,6 @@ pub struct ComputePipelineDescriptor<'a, A: Api> {
     pub layout: &'a A::PipelineLayout,
     /// The compiled compute stage and its entry point.
     pub stage: ProgrammableStage<'a, A>,
-    pub cache: Option<&'a A::PipelineCache>,
 }
 
 pub struct PipelineCacheDescriptor<'a> {
@@ -1698,7 +1703,6 @@ pub struct RenderPipelineDescriptor<'a, A: Api> {
     /// If the pipeline will be used with a multiview render pass, this indicates how many array
     /// layers the attachments will have.
     pub multiview: Option<NonZeroU32>,
-    pub cache: Option<&'a A::PipelineCache>,
 }
 
 #[derive(Debug, Clone)]
diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs
index 4dea6fe4d9..bb06de298b 100644
--- a/wgpu-hal/src/vulkan/device.rs
+++ b/wgpu-hal/src/vulkan/device.rs
@@ -1863,9 +1863,15 @@ impl crate::Device for super::Device {
                 .build()
         }];
 
-        let pipeline_cache = desc
-            .cache
-            .map(|it| it.raw)
+        let vertex_cache = desc.vertex_stage.cache.map(|it| it.raw);
+        let fragment_cache = desc
+            .fragment_stage
+            .as_ref()
+            .and_then(|it| it.cache)
+            .map(|it| it.raw);
+        // TODO: What should the behaviour be when both are set and different?
+        let pipeline_cache = vertex_cache
+            .or(fragment_cache)
             .unwrap_or(vk::PipelineCache::null());
 
         let mut raw_vec = {
@@ -1921,6 +1927,7 @@ impl crate::Device for super::Device {
         }];
 
         let pipeline_cache = desc
+            .stage
             .cache
             .map(|it| it.raw)
             .unwrap_or(vk::PipelineCache::null());
diff --git a/wgpu/src/backend/wgpu_core.rs b/wgpu/src/backend/wgpu_core.rs
index f0e647da6c..ca925257e9 100644
--- a/wgpu/src/backend/wgpu_core.rs
+++ b/wgpu/src/backend/wgpu_core.rs
@@ -1167,6 +1167,7 @@ impl crate::Context for ContextWgpuCore {
                         .vertex
                         .compilation_options
                         .zero_initialize_workgroup_memory,
+                    cache: desc.vertex.compilation_options.cache.map(|c| c.id.into()),
                 },
                 buffers: Borrowed(&vertex_buffers),
             },
@@ -1181,11 +1182,11 @@ impl crate::Context for ContextWgpuCore {
                     zero_initialize_workgroup_memory: frag
                         .compilation_options
                         .zero_initialize_workgroup_memory,
+                    cache: frag.compilation_options.cache.map(|c| c.id.into()),
                 },
                 targets: Borrowed(frag.targets),
             }),
             multiview: desc.multiview,
-            cache: desc.cache.map(|c| c.id.into()),
         };
 
         let (id, error) = wgc::gfx_select!(device => self.0.device_create_render_pipeline(
@@ -1234,8 +1235,8 @@ impl crate::Context for ContextWgpuCore {
                 zero_initialize_workgroup_memory: desc
                     .compilation_options
                     .zero_initialize_workgroup_memory,
+                cache: desc.compilation_options.cache.map(|c| c.id.into()),
             },
-            cache: desc.cache.map(|c| c.id.into()),
         };
 
         let (id, error) = wgc::gfx_select!(device => self.0.device_create_compute_pipeline(
diff --git a/wgpu/src/lib.rs b/wgpu/src/lib.rs
index d873352286..226dd10189 100644
--- a/wgpu/src/lib.rs
+++ b/wgpu/src/lib.rs
@@ -1933,8 +1933,6 @@ pub struct RenderPipelineDescriptor<'a> {
     /// If the pipeline will be used with a multiview render pass, this indicates how many array
     /// layers the attachments will have.
     pub multiview: Option<NonZeroU32>,
-    /// The pipeline cache to use when creating this pipeline.
-    pub cache: Option<&'a PipelineCache>,
 }
 #[cfg(send_sync)]
 static_assertions::assert_impl_all!(RenderPipelineDescriptor<'_>: Send, Sync);
@@ -1994,6 +1992,8 @@ pub struct PipelineCompilationOptions<'a> {
     /// This is required by the WebGPU spec, but may have overhead which can be avoided
     /// for cross-platform applications
     pub zero_initialize_workgroup_memory: bool,
+    /// The pipeline cache to use when creating this pipeline.
+    pub cache: Option<&'a PipelineCache>,
 }
 
 impl<'a> Default for PipelineCompilationOptions<'a> {
@@ -2007,6 +2007,7 @@ impl<'a> Default for PipelineCompilationOptions<'a> {
         Self {
             constants,
             zero_initialize_workgroup_memory: true,
+            cache: None,
         }
     }
 }
@@ -2028,8 +2029,6 @@ pub struct ComputePipelineDescriptor<'a> {
     /// The name of the entry point in the compiled shader. There must be a function with this name
     /// and no return value in the shader.
     pub entry_point: &'a str,
-    /// The pipeline cache to use when creating this pipeline.
-    pub cache: Option<&'a PipelineCache>,
     /// Advanced options for when this pipeline is compiled
     ///
     /// This implements `Default`, and for most users can be set to `Default::default()`
@@ -3223,12 +3222,6 @@ impl Device {
         DynContext::device_make_invalid(&*self.context, &self.id, self.data.as_ref())
     }
 
-    /// Test-only function to make this device invalid.
-    #[doc(hidden)]
-    pub fn make_invalid(&self) {
-        DynContext::device_make_invalid(&*self.context, &self.id, self.data.as_ref())
-    }
-
     /// Create a [`PipelineCache`] with initial data
     ///
     /// This can be passed to [`Device::create_compute_pipeline`]

From d46ccd34e8cb1431c2b8d3307486da93a1dc3ab4 Mon Sep 17 00:00:00 2001
From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com>
Date: Fri, 19 Apr 2024 11:53:52 +0100
Subject: [PATCH 20/31] Address some review comments

---
 wgpu-hal/src/vulkan/device.rs |  1 -
 wgpu/src/backend/wgpu_core.rs | 35 --------------------------------
 wgpu/src/context.rs           | 25 -----------------------
 wgpu/src/lib.rs               | 38 +++++------------------------------
 4 files changed, 5 insertions(+), 94 deletions(-)

diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs
index bb06de298b..ec91167822 100644
--- a/wgpu-hal/src/vulkan/device.rs
+++ b/wgpu-hal/src/vulkan/device.rs
@@ -1965,7 +1965,6 @@ impl crate::Device for super::Device {
         desc: &crate::PipelineCacheDescriptor<'_>,
     ) -> Result<PipelineCache, crate::PipelineCacheError> {
         let mut info = vk::PipelineCacheCreateInfo::builder();
-        // TODO: Add additional validation to the data, as described in https://medium.com/@zeuxcg/creating-a-robust-pipeline-cache-with-vulkan-961d09416cda
         if let Some(data) = desc.data {
             info = info.initial_data(data)
         }
diff --git a/wgpu/src/backend/wgpu_core.rs b/wgpu/src/backend/wgpu_core.rs
index ca925257e9..b8b8d8fd88 100644
--- a/wgpu/src/backend/wgpu_core.rs
+++ b/wgpu/src/backend/wgpu_core.rs
@@ -1296,41 +1296,6 @@ impl crate::Context for ContextWgpuCore {
         (id, ())
     }
 
-    fn device_create_pipeline_cache(
-        &self,
-        device: &Self::DeviceId,
-        // TODO: Will be used for error handling
-        device_data: &Self::DeviceData,
-        desc: &crate::PipelineCacheDescriptor<'_>,
-    ) -> (Self::PipelineCacheId, Self::PipelineCacheData) {
-        use wgc::pipeline as pipe;
-
-        let descriptor = pipe::PipelineCacheDescriptor {
-            label: desc.label.map(Borrowed),
-            data: None,
-            // if data is `None`, fallback won't be used
-            fallback: false,
-        };
-        // Safety: data is None, so no safety concerns
-        let (id, error) = unsafe {
-            wgc::gfx_select!(device => self.0.device_create_pipeline_cache(
-                *device,
-                &descriptor,
-                None
-            ))
-        };
-        if let Some(cause) = error {
-            self.handle_error(
-                &device_data.error_sink,
-                cause,
-                LABEL,
-                desc.label,
-                "Device::device_create_pipeline_cache_init",
-            );
-        }
-        (id, ())
-    }
-
     fn device_create_buffer(
         &self,
         device: &Self::DeviceId,
diff --git a/wgpu/src/context.rs b/wgpu/src/context.rs
index 19ccfaeb96..ea664c2327 100644
--- a/wgpu/src/context.rs
+++ b/wgpu/src/context.rs
@@ -242,12 +242,6 @@ pub trait Context: Debug + WasmNotSendSync + Sized {
         device_data: &Self::DeviceData,
         desc: &PipelineCacheInitDescriptor<'_>,
     ) -> (Self::PipelineCacheId, Self::PipelineCacheData);
-    fn device_create_pipeline_cache(
-        &self,
-        device: &Self::DeviceId,
-        device_data: &Self::DeviceData,
-        desc: &PipelineCacheDescriptor<'_>,
-    ) -> (Self::PipelineCacheId, Self::PipelineCacheData);
     fn device_create_buffer(
         &self,
         device: &Self::DeviceId,
@@ -1303,12 +1297,6 @@ pub(crate) trait DynContext: Debug + WasmNotSendSync {
         device_data: &crate::Data,
         desc: &PipelineCacheInitDescriptor<'_>,
     ) -> (ObjectId, Box<crate::Data>);
-    fn device_create_pipeline_cache(
-        &self,
-        device: &ObjectId,
-        device_data: &crate::Data,
-        desc: &PipelineCacheDescriptor<'_>,
-    ) -> (ObjectId, Box<crate::Data>);
     fn device_create_buffer(
         &self,
         device: &ObjectId,
@@ -2355,19 +2343,6 @@ where
         (pipeline_cache.into(), Box::new(data) as _)
     }
 
-    fn device_create_pipeline_cache(
-        &self,
-        device: &ObjectId,
-        device_data: &crate::Data,
-        desc: &PipelineCacheDescriptor<'_>,
-    ) -> (ObjectId, Box<crate::Data>) {
-        let device = <T::DeviceId>::from(*device);
-        let device_data = downcast_ref(device_data);
-        let (pipeline_cache, data) =
-            Context::device_create_pipeline_cache(self, &device, device_data, desc);
-        (pipeline_cache.into(), Box::new(data) as _)
-    }
-
     fn device_create_buffer(
         &self,
         device: &ObjectId,
diff --git a/wgpu/src/lib.rs b/wgpu/src/lib.rs
index 226dd10189..d70427e633 100644
--- a/wgpu/src/lib.rs
+++ b/wgpu/src/lib.rs
@@ -3231,13 +3231,13 @@ impl Device {
     /// # Safety
     ///
     /// The `data` field of `desc` must have previously been returned from a call
-    /// to [`PipelineCache::get_data`][^saving]. It's recommended to only `data` for the same
-    /// [`util::pipeline_cache_key`], but this isn't a safety requirement.
-    /// This is also compatible across wgpu versions, as any data format change will
+    /// to [`PipelineCache::get_data`][^saving]. This `data` will only be used if it came
+    /// from an adapter with the same [`util::pipeline_cache_key`].
+    /// This *is* compatible across wgpu versions, as any data format change will
     /// be accounted for.
     ///
-    /// Note that this means it is *not* supported to bring caches from previous
-    /// direct uses of backend APIs into this method.
+    /// It is *not* supported to bring caches from previous direct uses of backend APIs
+    /// into this method.
     ///
     /// # Errors
     ///
@@ -3277,34 +3277,6 @@ impl Device {
             data,
         }
     }
-
-    /// Create a pipeline cache without initial data
-    ///
-    /// This can be passed to [`Device::create_compute_pipeline`]
-    /// and [`Device::create_render_pipeline`] to initialise its cache data
-    ///
-    /// # Errors
-    ///
-    /// Errors if:
-    ///  * the [`PIPELINE_CACHE`](wgt::Features::PIPELINE_CACHE) feature is not enabled
-    ///  * this device is invalid; or
-    ///  * the device is out of memory
-    ///
-    /// If the error handler didn't panic,and an error value is used in
-    /// subsequent calls, default (driver-provided) caching will be used.
-    pub fn create_pipeline_cache(&self, desc: &PipelineCacheDescriptor<'_>) -> PipelineCache {
-        let (id, data) = DynContext::device_create_pipeline_cache(
-            &*self.context,
-            &self.id,
-            self.data.as_ref(),
-            desc,
-        );
-        PipelineCache {
-            context: Arc::clone(&self.context),
-            id,
-            data,
-        }
-    }
 }
 
 impl Drop for Device {

From e046f9bdea53f7c80bf2203c4307e83d57e69fea Mon Sep 17 00:00:00 2001
From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com>
Date: Fri, 19 Apr 2024 15:43:52 +0100
Subject: [PATCH 21/31] Combine the init and non-init paths

---
 wgpu-types/src/lib.rs         |  2 +-
 wgpu/src/backend/wgpu_core.rs |  8 +++----
 wgpu/src/context.rs           | 14 ++++++------
 wgpu/src/lib.rs               | 42 +++++++++++++----------------------
 4 files changed, 27 insertions(+), 39 deletions(-)

diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs
index e81617451c..63af7c8aae 100644
--- a/wgpu-types/src/lib.rs
+++ b/wgpu-types/src/lib.rs
@@ -918,7 +918,7 @@ bitflags::bitflags! {
         ///
         /// Supported platforms:
         /// - Vulkan
-        const PIPELINE_CACHE = 1 << 56;
+        const PIPELINE_CACHE = 1 << 59;
     }
 }
 
diff --git a/wgpu/src/backend/wgpu_core.rs b/wgpu/src/backend/wgpu_core.rs
index b8b8d8fd88..413015ef0f 100644
--- a/wgpu/src/backend/wgpu_core.rs
+++ b/wgpu/src/backend/wgpu_core.rs
@@ -4,7 +4,7 @@ use crate::{
     BufferDescriptor, CommandEncoderDescriptor, CompilationInfo, CompilationMessage,
     CompilationMessageType, ComputePassDescriptor, ComputePipelineDescriptor,
     DownlevelCapabilities, Features, Label, Limits, LoadOp, MapMode, Operations,
-    PipelineCacheInitDescriptor, PipelineLayoutDescriptor, RenderBundleEncoderDescriptor,
+    PipelineCacheDescriptor, PipelineLayoutDescriptor, RenderBundleEncoderDescriptor,
     RenderPipelineDescriptor, SamplerDescriptor, ShaderModuleDescriptor,
     ShaderModuleDescriptorSpirV, ShaderSource, StoreOp, SurfaceStatus, SurfaceTargetUnsafe,
     TextureDescriptor, TextureViewDescriptor, UncapturedErrorHandler,
@@ -1265,18 +1265,18 @@ impl crate::Context for ContextWgpuCore {
         (id, ())
     }
 
-    unsafe fn device_create_pipeline_cache_init(
+    unsafe fn device_create_pipeline_cache(
         &self,
         device: &Self::DeviceId,
         // TODO: Will be used for error handling
         device_data: &Self::DeviceData,
-        desc: &PipelineCacheInitDescriptor<'_>,
+        desc: &PipelineCacheDescriptor<'_>,
     ) -> (Self::PipelineCacheId, Self::PipelineCacheData) {
         use wgc::pipeline as pipe;
 
         let descriptor = pipe::PipelineCacheDescriptor {
             label: desc.label.map(Borrowed),
-            data: Some(desc.data.into()),
+            data: desc.data.map(Borrowed),
             fallback: desc.fallback,
         };
         let (id, error) = wgc::gfx_select!(device => self.0.device_create_pipeline_cache(
diff --git a/wgpu/src/context.rs b/wgpu/src/context.rs
index ea664c2327..a85038cfa3 100644
--- a/wgpu/src/context.rs
+++ b/wgpu/src/context.rs
@@ -236,11 +236,11 @@ pub trait Context: Debug + WasmNotSendSync + Sized {
         device_data: &Self::DeviceData,
         desc: &ComputePipelineDescriptor<'_>,
     ) -> (Self::ComputePipelineId, Self::ComputePipelineData);
-    unsafe fn device_create_pipeline_cache_init(
+    unsafe fn device_create_pipeline_cache(
         &self,
         device: &Self::DeviceId,
         device_data: &Self::DeviceData,
-        desc: &PipelineCacheInitDescriptor<'_>,
+        desc: &PipelineCacheDescriptor<'_>,
     ) -> (Self::PipelineCacheId, Self::PipelineCacheData);
     fn device_create_buffer(
         &self,
@@ -1291,11 +1291,11 @@ pub(crate) trait DynContext: Debug + WasmNotSendSync {
         device_data: &crate::Data,
         desc: &ComputePipelineDescriptor<'_>,
     ) -> (ObjectId, Box<crate::Data>);
-    unsafe fn device_create_pipeline_cache_init(
+    unsafe fn device_create_pipeline_cache(
         &self,
         device: &ObjectId,
         device_data: &crate::Data,
-        desc: &PipelineCacheInitDescriptor<'_>,
+        desc: &PipelineCacheDescriptor<'_>,
     ) -> (ObjectId, Box<crate::Data>);
     fn device_create_buffer(
         &self,
@@ -2330,16 +2330,16 @@ where
         (compute_pipeline.into(), Box::new(data) as _)
     }
 
-    unsafe fn device_create_pipeline_cache_init(
+    unsafe fn device_create_pipeline_cache(
         &self,
         device: &ObjectId,
         device_data: &crate::Data,
-        desc: &PipelineCacheInitDescriptor<'_>,
+        desc: &PipelineCacheDescriptor<'_>,
     ) -> (ObjectId, Box<crate::Data>) {
         let device = <T::DeviceId>::from(*device);
         let device_data = downcast_ref(device_data);
         let (pipeline_cache, data) =
-            unsafe { Context::device_create_pipeline_cache_init(self, &device, device_data, desc) };
+            unsafe { Context::device_create_pipeline_cache(self, &device, device_data, desc) };
         (pipeline_cache.into(), Box::new(data) as _)
     }
 
diff --git a/wgpu/src/lib.rs b/wgpu/src/lib.rs
index d70427e633..a1e350b3d9 100644
--- a/wgpu/src/lib.rs
+++ b/wgpu/src/lib.rs
@@ -1185,10 +1185,9 @@ static_assertions::assert_impl_all!(PipelineCache: Send, Sync);
 
 impl PipelineCache {
     /// Get the data associated with this pipeline cache.
-    ///
-    /// The data format may be `wgpu` specific, and should therefore only be
-    /// passed to a call to [`Device::create_pipeline_cache_init`] for a
-    /// compatible device.
+    /// The data format is an implementation detail of `wgpu`.
+    /// The only defined operation on this data setting it as the `data` field
+    /// on [`PipelineCacheDescriptor`], then to [`Device::create_pipeline_cache`].
     ///
     /// This function is unique to the Rust API of `wgpu`.
     pub fn get_data(&self) -> Option<Vec<u8>> {
@@ -2037,21 +2036,23 @@ pub struct ComputePipelineDescriptor<'a> {
 #[cfg(send_sync)]
 static_assertions::assert_impl_all!(ComputePipelineDescriptor<'_>: Send, Sync);
 
-/// Describes a pipeline cache which reuses data from a previous run.
+/// Describes a pipeline cache, which allows reusing compilation work
+/// between program runs.
 ///
-/// For use with [`Device::create_pipeline_cache_init`].
+/// For use with [`Device::create_pipeline_cache`]
 ///
 /// This type is unique to the Rust API of `wgpu`.
 #[derive(Clone, Debug)]
-pub struct PipelineCacheInitDescriptor<'a> {
+pub struct PipelineCacheDescriptor<'a> {
     /// Debug label of the pipeline cache. This might show up in some logs from `wgpu`
     pub label: Label<'a>,
-    /// The data used to initialise the cache initialise the cache using
+    /// The data used to initialise the cache initialise
     ///
     /// # Safety
+    ///
     /// This data must have been provided from a previous call to
-    /// [`PipelineCache::get_data`]
-    pub data: &'a [u8],
+    /// [`PipelineCache::get_data`], if not `None`
+    pub data: Option<&'a [u8]>,
     /// Whether to create a cache without data when the provided data
     /// is invalid.
     ///
@@ -2059,19 +2060,6 @@ pub struct PipelineCacheInitDescriptor<'a> {
     pub fallback: bool,
 }
 #[cfg(send_sync)]
-static_assertions::assert_impl_all!(PipelineCacheInitDescriptor<'_>: Send, Sync);
-
-/// Describes a pipeline cache when
-///
-/// For use with [`Device::create_pipeline_cache`].
-///
-/// This type is unique to the Rust API of `wgpu`.
-#[derive(Clone, Debug)]
-pub struct PipelineCacheDescriptor<'a> {
-    /// Debug label of the pipeline cache. This might show up in some logs from `wgpu`
-    pub label: Label<'a>,
-}
-#[cfg(send_sync)]
 static_assertions::assert_impl_all!(PipelineCacheDescriptor<'_>: Send, Sync);
 
 pub use wgt::ImageCopyBuffer as ImageCopyBufferBase;
@@ -3230,7 +3218,7 @@ impl Device {
     ///
     /// # Safety
     ///
-    /// The `data` field of `desc` must have previously been returned from a call
+    /// If the `data` field of `desc` is set, it must have previously been returned from a call
     /// to [`PipelineCache::get_data`][^saving]. This `data` will only be used if it came
     /// from an adapter with the same [`util::pipeline_cache_key`].
     /// This *is* compatible across wgpu versions, as any data format change will
@@ -3259,12 +3247,12 @@ impl Device {
     /// version of wgpu; or was created for an incompatible adapter, or there was a GPU driver
     /// update. In some cases, the data might not be used and a real value is returned,
     /// this is left to the discretion of GPU drivers.
-    pub unsafe fn create_pipeline_cache_init(
+    pub unsafe fn create_pipeline_cache(
         &self,
-        desc: &PipelineCacheInitDescriptor<'_>,
+        desc: &PipelineCacheDescriptor<'_>,
     ) -> PipelineCache {
         let (id, data) = unsafe {
-            DynContext::device_create_pipeline_cache_init(
+            DynContext::device_create_pipeline_cache(
                 &*self.context,
                 &self.id,
                 self.data.as_ref(),

From b470541daa89c88ba1680012cecde9b863f99a2c Mon Sep 17 00:00:00 2001
From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com>
Date: Fri, 19 Apr 2024 18:12:44 +0100
Subject: [PATCH 22/31] Fix more review comments and add a test

---
 tests/tests/pipeline_cache.rs    | 203 +++++++++++++++++++++++++++++++
 tests/tests/root.rs              |   1 +
 wgpu-core/src/device/resource.rs |  42 ++++---
 wgpu/src/backend/wgpu_core.rs    |   1 -
 4 files changed, 226 insertions(+), 21 deletions(-)
 create mode 100644 tests/tests/pipeline_cache.rs

diff --git a/tests/tests/pipeline_cache.rs b/tests/tests/pipeline_cache.rs
new file mode 100644
index 0000000000..9c9f6da578
--- /dev/null
+++ b/tests/tests/pipeline_cache.rs
@@ -0,0 +1,203 @@
+use std::num::NonZeroU64;
+
+use wgpu_test::{gpu_test, GpuTestConfiguration, TestParameters, TestingContext};
+
+/// We want to test that partial updates to push constants work as expected.
+///
+/// As such, we dispatch two compute passes, one which writes the values
+/// before a partial update, and one which writes the values after the partial update.
+///
+/// If the update code is working correctly, the values not written to by the second update
+/// will remain unchanged.
+#[gpu_test]
+static PIPELINE_CACHE: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(
+        TestParameters::default()
+            .test_features_limits()
+            .features(wgpu::Features::PIPELINE_CACHE)
+            .skip(wgpu_test::FailureCase::adapter("llvmpipe")),
+    )
+    .run_async(pipeline_cache_test);
+
+/// Set to a higher value if adding a timing based assertion. This is otherwise fast to compile
+const ARRAY_SIZE: u64 = 256;
+
+/// Create a shader which should be slow-ish to compile
+fn shader() -> String {
+    format!(
+        r#"
+        @group(0) @binding(0)
+        var<storage, read_write> output: array<u32>;
+    
+        @compute @workgroup_size(1)
+        fn main() {{
+        {}
+        }}
+        "#,
+        (0..ARRAY_SIZE)
+            // "Safety": There will only be a single workgroup, and a single thread in that workgroup
+            .map(|v| format!("    output[{v}] = {v}u;\n"))
+            .collect::<String>()
+    )
+}
+
+async fn pipeline_cache_test(ctx: TestingContext) {
+    let shader = shader();
+    let sm = ctx
+        .device
+        .create_shader_module(wgpu::ShaderModuleDescriptor {
+            label: Some("shader"),
+            source: wgpu::ShaderSource::Wgsl(shader.into()),
+        });
+
+    let bgl = ctx
+        .device
+        .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
+            label: Some("bind_group_layout"),
+            entries: &[wgpu::BindGroupLayoutEntry {
+                binding: 0,
+                visibility: wgpu::ShaderStages::COMPUTE,
+                ty: wgpu::BindingType::Buffer {
+                    ty: wgpu::BufferBindingType::Storage { read_only: false },
+                    has_dynamic_offset: false,
+                    min_binding_size: NonZeroU64::new(ARRAY_SIZE * 4),
+                },
+                count: None,
+            }],
+        });
+
+    let gpu_buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor {
+        label: Some("gpu_buffer"),
+        size: ARRAY_SIZE * 4,
+        usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
+        mapped_at_creation: false,
+    });
+
+    let cpu_buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor {
+        label: Some("cpu_buffer"),
+        size: ARRAY_SIZE * 4,
+        usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
+        mapped_at_creation: false,
+    });
+
+    let bind_group = ctx.device.create_bind_group(&wgpu::BindGroupDescriptor {
+        label: Some("bind_group"),
+        layout: &bgl,
+        entries: &[wgpu::BindGroupEntry {
+            binding: 0,
+            resource: gpu_buffer.as_entire_binding(),
+        }],
+    });
+
+    let pipeline_layout = ctx
+        .device
+        .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
+            label: Some("pipeline_layout"),
+            bind_group_layouts: &[&bgl],
+            push_constant_ranges: &[],
+        });
+
+    let first_cache_data;
+    let first_pipeline_duration;
+    {
+        let first_cache = unsafe {
+            ctx.device
+                .create_pipeline_cache(&wgpu::PipelineCacheDescriptor {
+                    label: Some("pipeline_cache"),
+                    data: None,
+                    fallback: false,
+                })
+        };
+        let start = std::time::Instant::now();
+        let first_pipeline = ctx
+            .device
+            .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
+                label: Some("pipeline"),
+                layout: Some(&pipeline_layout),
+                module: &sm,
+                entry_point: "main",
+                compilation_options: wgpu::PipelineCompilationOptions {
+                    cache: Some(&first_cache),
+                    ..Default::default()
+                },
+            });
+        first_pipeline_duration = start.elapsed();
+        validate_pipeline(&ctx, first_pipeline, &bind_group, &gpu_buffer, &cpu_buffer).await;
+        first_cache_data = first_cache.get_data();
+    }
+    assert!(first_cache_data.is_some());
+
+    let second_cache = unsafe {
+        ctx.device
+            .create_pipeline_cache(&wgpu::PipelineCacheDescriptor {
+                label: Some("pipeline_cache"),
+                data: first_cache_data.as_deref(),
+                fallback: false,
+            })
+    };
+    let start = std::time::Instant::now();
+    let first_pipeline = ctx
+        .device
+        .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
+            label: Some("pipeline"),
+            layout: Some(&pipeline_layout),
+            module: &sm,
+            entry_point: "main",
+            compilation_options: wgpu::PipelineCompilationOptions {
+                cache: Some(&second_cache),
+                ..Default::default()
+            },
+        });
+    let duration = start.elapsed();
+    validate_pipeline(&ctx, first_pipeline, &bind_group, &gpu_buffer, &cpu_buffer).await;
+    if false {
+        // Ideally, we could make this assertion. However, that doesn't actually work, because drivers have
+        // their own internal caches. This does work on my machine if I set `MESA_DISABLE_PIPELINE_CACHE=1`
+        // before running the test; but of course that is not a realistic scenario
+        assert!(duration.as_millis() < first_pipeline_duration.as_millis());
+    }
+}
+
+async fn validate_pipeline(
+    ctx: &TestingContext,
+    pipeline: wgpu::ComputePipeline,
+    bind_group: &wgpu::BindGroup,
+    gpu_buffer: &wgpu::Buffer,
+    cpu_buffer: &wgpu::Buffer,
+) {
+    let mut encoder = ctx
+        .device
+        .create_command_encoder(&wgpu::CommandEncoderDescriptor {
+            label: Some("encoder"),
+        });
+
+    {
+        let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
+            label: Some("compute_pass"),
+            timestamp_writes: None,
+        });
+        cpass.set_pipeline(&pipeline);
+        cpass.set_bind_group(0, &bind_group, &[]);
+
+        // -- Dispatch 0 --
+        cpass.dispatch_workgroups(1, 1, 1);
+    }
+
+    encoder.copy_buffer_to_buffer(&gpu_buffer, 0, &cpu_buffer, 0, ARRAY_SIZE * 4);
+    ctx.queue.submit([encoder.finish()]);
+    cpu_buffer.slice(..).map_async(wgpu::MapMode::Read, |_| ());
+    ctx.async_poll(wgpu::Maintain::wait())
+        .await
+        .panic_on_timeout();
+
+    let data = cpu_buffer.slice(..).get_mapped_range();
+
+    let arrays: &[u32] = bytemuck::cast_slice(&data);
+
+    assert_eq!(arrays.len(), ARRAY_SIZE as usize);
+    for (idx, value) in arrays.iter().copied().enumerate() {
+        assert_eq!(value as usize, idx);
+    }
+    drop(data);
+    cpu_buffer.unmap();
+}
diff --git a/tests/tests/root.rs b/tests/tests/root.rs
index 6dc7af56ec..cfdc97be76 100644
--- a/tests/tests/root.rs
+++ b/tests/tests/root.rs
@@ -23,6 +23,7 @@ mod nv12_texture;
 mod occlusion_query;
 mod partially_bounded_arrays;
 mod pipeline;
+mod pipeline_cache;
 mod poll;
 mod push_constants;
 mod query_set;
diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index b09c51bd28..08b495d3fd 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -3257,17 +3257,18 @@ impl<A: HalApi> Device<A> {
                 validated_stages |= stage;
             }
 
-            vertex_cache = if let Some(cache) = stage_desc.cache {
-                if let Ok(cache) = hub.pipeline_caches.get(cache) {
-                    if cache.device.as_info().id() != self.as_info().id() {
-                        return Err(DeviceError::WrongDevice.into());
-                    }
-                    Some(cache)
-                } else {
-                    None
+            vertex_cache = 'cache: {
+                let Some(cache) = stage_desc.cache else {
+                    break 'cache None;
+                };
+                let Ok(cache) = hub.pipeline_caches.get(cache) else {
+                    break 'cache None;
+                };
+
+                if cache.device.as_info().id() != self.as_info().id() {
+                    return Err(DeviceError::WrongDevice.into());
                 }
-            } else {
-                None
+                Some(cache)
             };
 
             hal::ProgrammableStage {
@@ -3333,17 +3334,18 @@ impl<A: HalApi> Device<A> {
                         })?;
                 }
 
-                fragment_cache = if let Some(cache) = fragment_state.stage.cache {
-                    if let Ok(cache) = hub.pipeline_caches.get(cache) {
-                        if cache.device.as_info().id() != self.as_info().id() {
-                            return Err(DeviceError::WrongDevice.into());
-                        }
-                        Some(cache)
-                    } else {
-                        None
+                fragment_cache = 'cache: {
+                    let Some(cache) = fragment_state.stage.cache else {
+                        break 'cache None;
+                    };
+                    let Ok(cache) = hub.pipeline_caches.get(cache) else {
+                        break 'cache None;
+                    };
+
+                    if cache.device.as_info().id() != self.as_info().id() {
+                        return Err(DeviceError::WrongDevice.into());
                     }
-                } else {
-                    None
+                    Some(cache)
                 };
 
                 Some(hal::ProgrammableStage {
diff --git a/wgpu/src/backend/wgpu_core.rs b/wgpu/src/backend/wgpu_core.rs
index 413015ef0f..ee113d3fc8 100644
--- a/wgpu/src/backend/wgpu_core.rs
+++ b/wgpu/src/backend/wgpu_core.rs
@@ -1268,7 +1268,6 @@ impl crate::Context for ContextWgpuCore {
     unsafe fn device_create_pipeline_cache(
         &self,
         device: &Self::DeviceId,
-        // TODO: Will be used for error handling
         device_data: &Self::DeviceData,
         desc: &PipelineCacheDescriptor<'_>,
     ) -> (Self::PipelineCacheId, Self::PipelineCacheData) {

From 3daf8ebaf3112eb8f76bd93b4ce83398c46ec6f5 Mon Sep 17 00:00:00 2001
From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com>
Date: Fri, 19 Apr 2024 18:15:56 +0100
Subject: [PATCH 23/31] Fix incorrect intra-doc-link

---
 wgpu-core/src/pipeline_cache.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/wgpu-core/src/pipeline_cache.rs b/wgpu-core/src/pipeline_cache.rs
index c46a53b4f6..d098cdafcf 100644
--- a/wgpu-core/src/pipeline_cache.rs
+++ b/wgpu-core/src/pipeline_cache.rs
@@ -160,7 +160,7 @@ struct PipelineCacheHeader {
     /// We assume that your cache storage system will be relatively robust, and so
     /// do not validate this hash
     ///
-    /// Therefore, this will always have a value of [`RESERVED_FOR_HASH`]
+    /// Therefore, this will always have a value of [`HASH_SPACE_VALUE`]
     hash_space: u64,
 }
 

From 130cb0a775e0b07d01bfa193a76cdece0da00f18 Mon Sep 17 00:00:00 2001
From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com>
Date: Fri, 19 Apr 2024 18:22:07 +0100
Subject: [PATCH 24/31] =?UTF-8?q?=F0=9F=93=8E?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/tests/pipeline_cache.rs | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/tests/tests/pipeline_cache.rs b/tests/tests/pipeline_cache.rs
index 9c9f6da578..13b3e5b52c 100644
--- a/tests/tests/pipeline_cache.rs
+++ b/tests/tests/pipeline_cache.rs
@@ -1,4 +1,4 @@
-use std::num::NonZeroU64;
+use std::{fmt::Write, num::NonZeroU64};
 
 use wgpu_test::{gpu_test, GpuTestConfiguration, TestParameters, TestingContext};
 
@@ -34,10 +34,11 @@ fn shader() -> String {
         {}
         }}
         "#,
-        (0..ARRAY_SIZE)
+        (0..ARRAY_SIZE).fold(String::new(), |mut s, v| {
             // "Safety": There will only be a single workgroup, and a single thread in that workgroup
-            .map(|v| format!("    output[{v}] = {v}u;\n"))
-            .collect::<String>()
+            writeln!(s, "    output[{v}] = {v}u;").expect("String");
+            s
+        })
     )
 }
 
@@ -177,13 +178,13 @@ async fn validate_pipeline(
             timestamp_writes: None,
         });
         cpass.set_pipeline(&pipeline);
-        cpass.set_bind_group(0, &bind_group, &[]);
+        cpass.set_bind_group(0, bind_group, &[]);
 
         // -- Dispatch 0 --
         cpass.dispatch_workgroups(1, 1, 1);
     }
 
-    encoder.copy_buffer_to_buffer(&gpu_buffer, 0, &cpu_buffer, 0, ARRAY_SIZE * 4);
+    encoder.copy_buffer_to_buffer(gpu_buffer, 0, cpu_buffer, 0, ARRAY_SIZE * 4);
     ctx.queue.submit([encoder.finish()]);
     cpu_buffer.slice(..).map_async(wgpu::MapMode::Read, |_| ());
     ctx.async_poll(wgpu::Maintain::wait())

From 20906e994094f5de15dcf7fc572dd7c013f4594c Mon Sep 17 00:00:00 2001
From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com>
Date: Fri, 19 Apr 2024 18:32:58 +0100
Subject: [PATCH 25/31] Fix webgpu compile

---
 wgpu/src/backend/webgpu.rs | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/wgpu/src/backend/webgpu.rs b/wgpu/src/backend/webgpu.rs
index 5fbbc4668a..fa2896dfc9 100644
--- a/wgpu/src/backend/webgpu.rs
+++ b/wgpu/src/backend/webgpu.rs
@@ -1997,7 +1997,7 @@ impl crate::context::Context for ContextWebGpu {
         create_identified(device_data.0.create_compute_pipeline(&mapped_desc))
     }
 
-    fn device_create_pipeline_cache(
+    unsafe fn device_create_pipeline_cache(
         &self,
         _: &Self::DeviceId,
         _: &Self::DeviceData,
@@ -2005,14 +2005,6 @@ impl crate::context::Context for ContextWebGpu {
     ) -> (Self::PipelineCacheId, Self::PipelineCacheData) {
         (Unused, ())
     }
-    unsafe fn device_create_pipeline_cache_init(
-        &self,
-        _: &Self::DeviceId,
-        _: &Self::DeviceData,
-        _: &crate::PipelineCacheInitDescriptor<'_>,
-    ) -> (Self::PipelineCacheId, Self::PipelineCacheData) {
-        (Unused, ())
-    }
     fn pipeline_cache_drop(&self, _: &Self::PipelineCacheId, _: &Self::PipelineCacheData) {}
 
     fn device_create_buffer(

From bddbe631b13048c1d54bb9efc26db0397978fff0 Mon Sep 17 00:00:00 2001
From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com>
Date: Thu, 9 May 2024 10:47:50 +0200
Subject: [PATCH 26/31] Fix changelog again

---
 CHANGELOG.md | 112 +++++++++++++++------------------------------------
 1 file changed, 33 insertions(+), 79 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ea156a294c..c13ba81487 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -46,7 +46,6 @@ Bottom level categories:
 Wgpu now supports querying [shader compilation info](https://www.w3.org/TR/webgpu/#dom-gpushadermodule-getcompilationinfo).
 
 This allows you to get more structured information about compilation errors, warnings and info:
-
 ```rust
 ...
 let lighting_shader = ctx.device.create_shader_module(include_wgsl!("lighting.wgsl"));
@@ -63,6 +62,8 @@ for message in compilation_info
 
 By @stefnotch in [#5410](https://github.com/gfx-rs/wgpu/pull/5410)
 
+
+
 ### New features
 
 #### Vulkan
@@ -77,7 +78,7 @@ By @stefnotch in [#5410](https://github.com/gfx-rs/wgpu/pull/5410)
 
 #### GLES / OpenGL
 
-- Fix regression on OpenGL (EGL) where non-sRGB still used sRGB [#5642](https://github.com/gfx-rs/wgpu/pull/5642)
+-  Fix regression on OpenGL (EGL) where non-sRGB still used sRGB [#5642](https://github.com/gfx-rs/wgpu/pull/5642)
 
 ## v0.20.0 (2024-04-28)
 
@@ -88,13 +89,10 @@ By @stefnotch in [#5410](https://github.com/gfx-rs/wgpu/pull/5410)
 Wgpu supports now [pipeline-overridable constants](https://www.w3.org/TR/webgpu/#dom-gpuprogrammablestage-constants)
 
 This allows you to define constants in wgsl like this:
-
 ```rust
 override some_factor: f32 = 42.1337; // Specifies a default of 42.1337 if it's not set.
 ```
-
 And then set them at runtime like so on your pipeline consuming this shader:
-
 ```rust
 // ...
 fragment: Some(wgpu::FragmentState {
@@ -116,6 +114,7 @@ Due to a specification change `write_timestamp` is no longer supported on WebGPU
 
 By @wumpf in [#5188](https://github.com/gfx-rs/wgpu/pull/5188)
 
+
 #### Wgsl const evaluation for many more built-ins
 
 Many numeric built-ins have had a constant evaluation implementation added for them, which allows them to be used in a `const` context:
@@ -132,17 +131,17 @@ The following subgroup operations are available in wgsl now:
 
 `subgroupBallot`, `subgroupAll`, `subgroupAny`, `subgroupAdd`, `subgroupMul`, `subgroupMin`, `subgroupMax`, `subgroupAnd`, `subgroupOr`, `subgroupXor`, `subgroupExclusiveAdd`, `subgroupExclusiveMul`, `subgroupInclusiveAdd`, `subgroupInclusiveMul`, `subgroupBroadcastFirst`, `subgroupBroadcast`, `subgroupShuffle`, `subgroupShuffleDown`, `subgroupShuffleUp`, `subgroupShuffleXor`
 
-Availability is governed by the following feature flags:
 
-- `wgpu::Features::SUBGROUP` for all operations except `subgroupBarrier` in fragment & compute, supported on Vulkan, DX12 and Metal.
-- `wgpu::Features::SUBGROUP_VERTEX`, for all operations except `subgroupBarrier` general operations in  vertex shaders, supported on Vulkan
-- `wgpu::Features::SUBGROUP_BARRIER`, for support of the `subgroupBarrier` operation, supported on Vulkan & Metal
+Availability is governed by the following feature flags:
+* `wgpu::Features::SUBGROUP` for all operations except `subgroupBarrier` in fragment & compute, supported on Vulkan, DX12 and Metal.
+* `wgpu::Features::SUBGROUP_VERTEX`, for all operations except `subgroupBarrier` general operations in  vertex shaders, supported on Vulkan
+* `wgpu::Features::SUBGROUP_BARRIER`, for support of the `subgroupBarrier` operation, supported on Vulkan & Metal
 
 Note that there currently [some differences](https://github.com/gfx-rs/wgpu/issues/5555) between wgpu's native-only implementation and the [open WebGPU proposal](https://github.com/gpuweb/gpuweb/blob/main/proposals/subgroups.md).
 
 By @exrook and @lichtso in [#5301](https://github.com/gfx-rs/wgpu/pull/5301)
 
-##### Signed and unsigned 64 bit integer support in shaders
+##### Signed and unsigned 64 bit integer support in shaders.
 
 `wgpu::Features::SHADER_INT64` enables 64 bit integer signed and unsigned integer variables in wgsl (`i64` and `u64` respectively).
 Supported on Vulkan, DX12 (requires DXC) and Metal (with MSL 2.3+ support).
@@ -161,12 +160,10 @@ By @atlv24 and @cwfitzgerald in [#5154](https://github.com/gfx-rs/wgpu/pull/5154
   - This has been added to the set of flags set by `InstanceFlags::advanced_debugging`. Since the overhead is potentially very large, the flag is not enabled by default in debug builds when using `InstanceFlags::from_build_config`.
   - As with other instance flags, this flag can be changed in calls to `InstanceFlags::with_env` with the new `WGPU_GPU_BASED_VALIDATION` environment variable.
 - `wgpu::Instance` can now report which `wgpu::Backends` are available based on the build configuration. By @wumpf [#5167](https://github.com/gfx-rs/wgpu/pull/5167)
-
   ```diff
   -wgpu::Instance::any_backend_feature_enabled()
   +!wgpu::Instance::enabled_backend_features().is_empty()
   ```
-
 - Breaking change: [`wgpu_core::pipeline::ProgrammableStageDescriptor`](https://docs.rs/wgpu-core/latest/wgpu_core/pipeline/struct.ProgrammableStageDescriptor.html#structfield.entry_point) is now optional. By @ErichDonGubler in [#5305](https://github.com/gfx-rs/wgpu/pull/5305).
 - `Features::downlevel{_webgl2,}_features` was made const by @MultisampledNight in [#5343](https://github.com/gfx-rs/wgpu/pull/5343)
 - Breaking change: [`wgpu_core::pipeline::ShaderError`](https://docs.rs/wgpu-core/latest/wgpu_core/pipeline/struct.ShaderError.html) has been moved to `naga`. By @stefnotch in [#5410](https://github.com/gfx-rs/wgpu/pull/5410)
@@ -223,7 +220,6 @@ By @atlv24 and @cwfitzgerald in [#5154](https://github.com/gfx-rs/wgpu/pull/5154
 ### Bug Fixes
 
 #### General
-
 - Fix `serde` feature not compiling for `wgpu-types`. By @KirmesBude in [#5149](https://github.com/gfx-rs/wgpu/pull/5149)
 - Fix the validation of vertex and index ranges. By @nical in [#5144](https://github.com/gfx-rs/wgpu/pull/5144) and [#5156](https://github.com/gfx-rs/wgpu/pull/5156)
 - Fix panic when creating a surface while no backend is available. By @wumpf [#5166](https://github.com/gfx-rs/wgpu/pull/5166)
@@ -277,7 +273,6 @@ By @atlv24 and @cwfitzgerald in [#5154](https://github.com/gfx-rs/wgpu/pull/5154
 - Fix deadlock in certain situations when mapping buffers using `wgpu-profiler`. By @cwfitzgerald in [#5517](https://github.com/gfx-rs/wgpu/pull/5517)
 
 #### WebGPU
-
 - Correctly pass through timestamp queries to WebGPU. By @cwfitzgerald in [#5527](https://github.com/gfx-rs/wgpu/pull/5527).
 
 ## v0.19.3 (2024-03-01)
@@ -304,9 +299,9 @@ By @cwfitzgerald in [#5325](https://github.com/gfx-rs/wgpu/pull/5325).
 - Fix incorrect validation causing all indexed draws on render bundles to fail. By @wumpf in [#5430](https://github.com/gfx-rs/wgpu/pull/5340).
 
 #### Android
-
 - Fix linking error when targeting android without `winit`. By @ashdnazg in [#5326](https://github.com/gfx-rs/wgpu/pull/5326).
 
+
 ## v0.19.2 (2024-02-29)
 
 This release includes `wgpu`, `wgpu-core`, `wgpu-hal`, `wgpu-types`, and `naga`. All other crates are unchanged.
@@ -314,19 +309,15 @@ This release includes `wgpu`, `wgpu-core`, `wgpu-hal`, `wgpu-types`, and `naga`.
 ### Added/New Features
 
 #### General
-
 - `wgpu::Id` now implements `PartialOrd`/`Ord` allowing it to be put in `BTreeMap`s. By @cwfitzgerald and @9291Sam in [#5176](https://github.com/gfx-rs/wgpu/pull/5176)
 
 #### OpenGL
-
 - Log an error when OpenGL texture format heuristics fail. By @PolyMeilex in [#5266](https://github.com/gfx-rs/wgpu/issues/5266)
 
 #### `wgsl-out`
-
 - Learned to generate acceleration structure types. By @JMS55 in [#5261](https://github.com/gfx-rs/wgpu/pull/5261)
 
 ### Documentation
-
 - Fix link in `wgpu::Instance::create_surface` documentation. By @HexoKnight in [#5280](https://github.com/gfx-rs/wgpu/pull/5280).
 - Fix typo in `wgpu::CommandEncoder::clear_buffer` documentation. By @PWhiddy in [#5281](https://github.com/gfx-rs/wgpu/pull/5281).
 - `Surface` configuration incorrectly claimed that `wgpu::Instance::create_surface` was unsafe. By @hackaugusto in [#5265](https://github.com/gfx-rs/wgpu/pull/5265).
@@ -334,7 +325,6 @@ This release includes `wgpu`, `wgpu-core`, `wgpu-hal`, `wgpu-types`, and `naga`.
 ### Bug Fixes
 
 #### General
-
 - Device lost callbacks are invoked when replaced and when global is dropped. By @bradwerth in [#5168](https://github.com/gfx-rs/wgpu/pull/5168)
 - Fix performance regression when allocating a large amount of resources of the same type. By @nical in [#5229](https://github.com/gfx-rs/wgpu/pull/5229)
 - Fix docs.rs wasm32 builds. By @cwfitzgerald in [#5310](https://github.com/gfx-rs/wgpu/pull/5310)
@@ -343,11 +333,9 @@ This release includes `wgpu`, `wgpu-core`, `wgpu-hal`, `wgpu-types`, and `naga`.
 - Fix missing validation for `Device::clear_buffer` where `offset + size > buffer.size` was not checked when `size` was omitted. By @ErichDonGubler in [#5282](https://github.com/gfx-rs/wgpu/pull/5282).
 
 #### DX12
-
 - Fix `panic!` when dropping `Instance` without `InstanceFlags::VALIDATION`. By @hakolao in [#5134](https://github.com/gfx-rs/wgpu/pull/5134)
 
 #### OpenGL
-
 - Fix internal format for the `Etc2Rgba8Unorm` format. By @andristarr in [#5178](https://github.com/gfx-rs/wgpu/pull/5178)
 - Try to load `libX11.so.6` in addition to `libX11.so` on linux. [#5307](https://github.com/gfx-rs/wgpu/pull/5307)
 - Make use of `GL_EXT_texture_shadow_lod` to support sampling a cube depth texture with an explicit LOD. By @cmrschwarz in #[5171](https://github.com/gfx-rs/wgpu/pull/5171).
@@ -356,6 +344,7 @@ This release includes `wgpu`, `wgpu-core`, `wgpu-hal`, `wgpu-types`, and `naga`.
 
 - Fix code generation from nested loops. By @cwfitzgerald and @teoxoy in [#5311](https://github.com/gfx-rs/wgpu/pull/5311)
 
+
 ## v0.19.1 (2024-01-22)
 
 This release includes `wgpu` and `wgpu-hal`. The rest of the crates are unchanged since 0.19.0.
@@ -380,10 +369,10 @@ This release includes `wgpu` and `wgpu-hal`. The rest of the crates are unchange
 
 - Document Wayland specific behavior related to `SurfaceTexture::present`. By @i509VCB in [#5093](https://github.com/gfx-rs/wgpu/pull/5093).
 
+
 ## v0.19.0 (2024-01-17)
 
 This release includes:
-
 - `wgpu`
 - `wgpu-core`
 - `wgpu-hal`
@@ -405,7 +394,6 @@ By @gents83 in [#3626](https://github.com/gfx-rs/wgpu/pull/3626) and thanks also
 
 All of wgpu's public dependencies are now re-exported at the top level so that users don't need to take their own dependencies.
 This includes:
-
 - wgpu-core
 - wgpu-hal
 - naga
@@ -452,7 +440,6 @@ By @i509VCB in [#4754](https://github.com/gfx-rs/wgpu/pull/4754).
 ### `DeviceExt::create_texture_with_data` allows Mip-Major Data
 
 Previously, `DeviceExt::create_texture_with_data` only allowed data to be provided in layer major order. There is now a `order` parameter which allows you to specify if the data is in layer major or mip major order.
-
 ```diff
     let tex = ctx.device.create_texture_with_data(
         &queue,
@@ -472,7 +459,6 @@ Passing an owned value `window` to `Surface` will return a `wgpu::Surface<'stati
 All possible safe variants (owned windows and web canvases) are grouped using `wgpu::SurfaceTarget`.
 Conversion to `wgpu::SurfaceTarget` is automatic for any type implementing `raw-window-handle`'s `HasWindowHandle` & `HasDisplayHandle` traits, i.e. most window types.
 For web canvas types this has to be done explicitly:
-
 ```rust
 let surface: wgpu::Surface<'static> = instance.create_surface(wgpu::SurfaceTarget::Canvas(my_canvas))?;
 ```
@@ -482,15 +468,12 @@ All unsafe variants are now grouped under `wgpu::Instance::create_surface_unsafe
 
 In order to create a `wgpu::Surface<'static>` without passing ownership of the window use
 `wgpu::SurfaceTargetUnsafe::from_window`:
-
 ```rust
 let surface = unsafe {
   instance.create_surface_unsafe(wgpu::SurfaceTargetUnsafe::from_window(&my_window))?
 };
 ```
-
 The easiest way to make this code safe is to use shared ownership:
-
 ```rust
 let window: Arc<winit::Window>;
 // ...
@@ -510,27 +493,21 @@ automatically converting literals and other constant expressions
 from abstract numeric types to concrete types when safe and
 necessary. For example, to build a vector of floating-point
 numbers, Naga previously made you write:
-
 ```rust
 vec3<f32>(1.0, 2.0, 3.0)
 ```
-
 With this change, you can now simply write:
-
 ```rust
 vec3<f32>(1, 2, 3)
 ```
-
 Even though the literals are abstract integers, Naga recognizes
 that it is safe and necessary to convert them to `f32` values in
 order to build the vector. You can also use abstract values as
 initializers for global constants and global and local variables,
 like this:
-
 ```rust
 var unit_x: vec2<f32> = vec2(1, 0);
 ```
-
 The literals `1` and `0` are abstract integers, and the expression
 `vec2(1, 0)` is an abstract vector. However, Naga recognizes that
 it can convert that to the concrete type `vec2<f32>` to satisfy
@@ -573,7 +550,6 @@ By @cwfitzgerald in [#5053](https://github.com/gfx-rs/wgpu/pull/5053)
 ### New Features
 
 #### General
-
 - Added `DownlevelFlags::VERTEX_AND_INSTANCE_INDEX_RESPECTS_RESPECTIVE_FIRST_VALUE_IN_INDIRECT_DRAW` to know if `@builtin(vertex_index)` and `@builtin(instance_index)` will respect the `first_vertex` / `first_instance` in indirect calls. If this is not present, both will always start counting from 0. Currently enabled on all backends except DX12. By @cwfitzgerald in [#4722](https://github.com/gfx-rs/wgpu/pull/4722).
 - Added support for the `FLOAT32_FILTERABLE` feature (web and native, corresponds to WebGPU's `float32-filterable`). By @almarklein in [#4759](https://github.com/gfx-rs/wgpu/pull/4759).
 - GPU buffer memory is released during "lose the device". By @bradwerth in [#4851](https://github.com/gfx-rs/wgpu/pull/4851).
@@ -588,7 +564,6 @@ By @cwfitzgerald in [#5053](https://github.com/gfx-rs/wgpu/pull/5053)
 - `SurfaceConfiguration` now exposes `desired_maximum_frame_latency` which was previously hard-coded to 2. By setting it to 1 you can reduce latency under the risk of making GPU & CPU work sequential. Currently, on DX12 this affects the `MaximumFrameLatency`, on all other backends except OpenGL the size of the swapchain (on OpenGL this has no effect). By @emilk & @wumpf in [#4899](https://github.com/gfx-rs/wgpu/pull/4899)
 
 #### OpenGL
-
 - `@builtin(instance_index)` now properly reflects the range provided in the draw call instead of always counting from 0. By @cwfitzgerald in [#4722](https://github.com/gfx-rs/wgpu/pull/4722).
 - Desktop GL now supports `POLYGON_MODE_LINE` and `POLYGON_MODE_POINT`. By @valaphee in [#4836](https://github.com/gfx-rs/wgpu/pull/4836).
 
@@ -660,7 +635,6 @@ This release includes `naga` version 0.14.2. The crates `wgpu-core`, `wgpu-hal`
 ### Bug Fixes
 
 #### Naga
-
 - When evaluating const-expressions and generating SPIR-V, properly handle `Compose` expressions whose operands are `Splat` expressions. Such expressions are created and marked as constant by the constant evaluator. By @jimblandy in [#4695](https://github.com/gfx-rs/wgpu/pull/4695).
 
 ## v0.18.1 (2023-11-15)
@@ -670,18 +644,15 @@ This release includes `naga` version 0.14.2. The crates `wgpu-core`, `wgpu-hal`
 ### Bug Fixes
 
 #### General
-
 - Fix panic in `Surface::configure` in debug builds. By @cwfitzgerald in [#4635](https://github.com/gfx-rs/wgpu/pull/4635)
 - Fix crash when all the following are true: By @teoxoy in #[#4642](https://github.com/gfx-rs/wgpu/pull/4642)
   - Passing a naga module directly to `Device::create_shader_module`.
   - `InstanceFlags::DEBUG` is enabled.
 
 #### DX12
-
 - Always use HLSL 2018 when using DXC to compile HLSL shaders. By @daxpedda in [#4629](https://github.com/gfx-rs/wgpu/pull/4629)
 
 #### Metal
-
 - In Metal Shading Language output, fix issue where local variables were sometimes using variable names from previous functions. By @DJMcNab in [#4594](https://github.com/gfx-rs/wgpu/pull/4594)
 
 ## v0.18.0 (2023-10-25)
@@ -827,7 +798,7 @@ let instance = wgpu::Instance::new(InstanceDescriptor {
 `gles_minor_version`: By @PJB3005 in [#3998](https://github.com/gfx-rs/wgpu/pull/3998)
 `flags`: By @nical in [#4230](https://github.com/gfx-rs/wgpu/pull/4230)
 
-### Many New Examples
+### Many New Examples!
 
 - Added the following examples: By @JustAnotherCodemonkey in [#3885](https://github.com/gfx-rs/wgpu/pull/3885).
   - [repeated-compute](https://github.com/gfx-rs/wgpu/tree/trunk/examples/repeated-compute)
@@ -877,6 +848,7 @@ By @teoxoy in [#4185](https://github.com/gfx-rs/wgpu/pull/4185)
 - Allow filtering labels out before they are passed to GPU drivers by @nical in [https://github.com/gfx-rs/wgpu/pull/4246](4246)
 - `DeviceLostClosure` callback mechanism provided so user agents can resolve `GPUDevice.lost` Promises at the appropriate time by @bradwerth in [#4645](https://github.com/gfx-rs/wgpu/pull/4645)
 
+
 #### Vulkan
 
 - Rename `wgpu_hal::vulkan::Instance::required_extensions` to `desired_extensions`. By @jimblandy in [#4115](https://github.com/gfx-rs/wgpu/pull/4115)
@@ -949,7 +921,7 @@ By @teoxoy in [#4185](https://github.com/gfx-rs/wgpu/pull/4185)
 
 ### Added/New Features
 
-- Add `get_mapped_range_as_array_buffer` for faster buffer read-backs in wasm builds. By @ryankaplan in [#4042] (<https://github.com/gfx-rs/wgpu/pull/4042>).
+- Add `get_mapped_range_as_array_buffer` for faster buffer read-backs in wasm builds. By @ryankaplan in [#4042] (https://github.com/gfx-rs/wgpu/pull/4042).
 
 ### Bug Fixes
 
@@ -1005,6 +977,7 @@ By @fornwall in [#3904](https://github.com/gfx-rs/wgpu/pull/3904) and [#3905](ht
 
 - Added support for importing external buffers using `buffer_from_raw` (Dx12, Metal, Vulkan) and `create_buffer_from_hal`. By @AdrianEddy in [#3355](https://github.com/gfx-rs/wgpu/pull/3355)
 
+
 #### Vulkan
 
 - Work around [Vulkan-ValidationLayers#5671](https://github.com/KhronosGroup/Vulkan-ValidationLayers/issues/5671) by ignoring reports of violations of [VUID-vkCmdEndDebugUtilsLabelEXT-commandBuffer-01912](https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-vkCmdEndDebugUtilsLabelEXT-commandBuffer-01912). By @jimblandy in [#3809](https://github.com/gfx-rs/wgpu/pull/3809).
@@ -1015,7 +988,7 @@ By @fornwall in [#3904](https://github.com/gfx-rs/wgpu/pull/3904) and [#3905](ht
 
 - Empty scissor rects are allowed now, matching the specification. by @PJB3005 in [#3863](https://github.com/gfx-rs/wgpu/pull/3863).
 - Add back components info to `TextureFormat`s. By @teoxoy in [#3843](https://github.com/gfx-rs/wgpu/pull/3843).
-- Add `get_mapped_range_as_array_buffer` for faster buffer read-backs in wasm builds. By @ryankaplan in [#4042] (<https://github.com/gfx-rs/wgpu/pull/4042>).
+- Add `get_mapped_range_as_array_buffer` for faster buffer read-backs in wasm builds. By @ryankaplan in [#4042] (https://github.com/gfx-rs/wgpu/pull/4042).
 
 ### Documentation
 
@@ -1078,7 +1051,7 @@ By @fornwall in [#3904](https://github.com/gfx-rs/wgpu/pull/3904) and [#3905](ht
 
 #### DX12
 
-- Increase the `max_storage_buffers_per_shader_stage` and `max_storage_textures_per_shader_stage` limits based on what the hardware supports. by @Elabajaba in [#3798]<https://github.com/gfx-rs/wgpu/pull/3798>
+- Increase the `max_storage_buffers_per_shader_stage` and `max_storage_textures_per_shader_stage` limits based on what the hardware supports. by @Elabajaba in [#3798]https://github.com/gfx-rs/wgpu/pull/3798
 
 ## v0.16.1 (2023-05-24)
 
@@ -1093,7 +1066,7 @@ By @fornwall in [#3904](https://github.com/gfx-rs/wgpu/pull/3904) and [#3905](ht
 
 #### WebGPU
 
-- Fix crash when calling `create_surface_from_canvas`. By @grovesNL in [#3718](https://github.com/gfx-rs/wgpu/pull/3718)
+* Fix crash when calling `create_surface_from_canvas`. By @grovesNL in [#3718](https://github.com/gfx-rs/wgpu/pull/3718)
 
 ## v0.16.0 (2023-04-19)
 
@@ -1112,6 +1085,7 @@ By @fornwall in [#3904](https://github.com/gfx-rs/wgpu/pull/3904) and [#3905](ht
 
 The `TextureFormat::describe` function was removed in favor of separate functions: `block_dimensions`, `is_compressed`, `is_srgb`, `required_features`, `guaranteed_format_features`, `sample_type` and `block_size`.
 
+
 ```diff
 - let block_dimensions = format.describe().block_dimensions;
 + let block_dimensions = format.block_dimensions();
@@ -1204,7 +1178,6 @@ By @cwfitzgerald in [#3671](https://github.com/gfx-rs/wgpu/pull/3671).
 ### Added/New Features
 
 #### General
-
 - Added feature flags for ray-tracing (currently only hal): `RAY_QUERY` and `RAY_TRACING` @daniel-keitel (started by @expenses) in [#3507](https://github.com/gfx-rs/wgpu/pull/3507)
 
 #### Vulkan
@@ -1215,6 +1188,7 @@ By @cwfitzgerald in [#3671](https://github.com/gfx-rs/wgpu/pull/3671).
 
 - Added basic ray-tracing api for acceleration structures, and ray-queries @daniel-keitel (started by @expenses) in [#3507](https://github.com/gfx-rs/wgpu/pull/3507)
 
+
 ### Changes
 
 #### General
@@ -1241,14 +1215,12 @@ By @cwfitzgerald in [#3671](https://github.com/gfx-rs/wgpu/pull/3671).
 - Add support for `Features::RG11B10UFLOAT_RENDERABLE`. By @mockersf in [#3689](https://github.com/gfx-rs/wgpu/pull/3689)
 
 #### Vulkan
-
 - Set `max_memory_allocation_size` via `PhysicalDeviceMaintenance3Properties`. By @jinleili in [#3567](https://github.com/gfx-rs/wgpu/pull/3567)
 - Silence false-positive validation error about surface resizing. By @seabassjh in [#3627](https://github.com/gfx-rs/wgpu/pull/3627)
 
 ### Bug Fixes
 
 #### General
-
 - `copyTextureToTexture` src/dst aspects must both refer to all aspects of src/dst format. By @teoxoy in [#3431](https://github.com/gfx-rs/wgpu/pull/3431)
 - Validate before extracting texture selectors. By @teoxoy in [#3487](https://github.com/gfx-rs/wgpu/pull/3487)
 - Fix fatal errors (those which panic even if an error handler is set) not including all of the details. By @kpreid in [#3563](https://github.com/gfx-rs/wgpu/pull/3563)
@@ -1256,33 +1228,27 @@ By @cwfitzgerald in [#3671](https://github.com/gfx-rs/wgpu/pull/3671).
 - Fix surfaces not being dropped until exit. By @benjaminschaaf in [#3647](https://github.com/gfx-rs/wgpu/pull/3647)
 
 #### WebGPU
-
 - Fix handling of `None` values for `depth_ops` and `stencil_ops` in `RenderPassDescriptor::depth_stencil_attachment`. By @niklaskorz in [#3660](https://github.com/gfx-rs/wgpu/pull/3660)
 - Avoid using `WasmAbi` functions for WebGPU backend. By @grovesNL in [#3657](https://github.com/gfx-rs/wgpu/pull/3657)
 
 #### DX12
-
 - Use typeless formats for textures that might be viewed as srgb or non-srgb. By @teoxoy in [#3555](https://github.com/gfx-rs/wgpu/pull/3555)
 
 #### GLES
-
 - Set FORCE_POINT_SIZE if it is vertex shader with mesh consist of point list. By @REASY in [3440](https://github.com/gfx-rs/wgpu/pull/3440)
 - Remove unwraps inside `surface.configure`. By @cwfitzgerald in [#3585](https://github.com/gfx-rs/wgpu/pull/3585)
 - Fix `copy_external_image_to_texture`, `copy_texture_to_texture` and `copy_buffer_to_texture` not taking the specified index into account if the target texture is a cube map, 2D texture array or cube map array. By @daxpedda [#3641](https://github.com/gfx-rs/wgpu/pull/3641)
 - Fix disabling of vertex attributes with non-consecutive locations. By @Azorlogh in [#3706](https://github.com/gfx-rs/wgpu/pull/3706)
 
 #### Metal
-
 - Fix metal erroring on an `array_stride` of 0. By @teoxoy in [#3538](https://github.com/gfx-rs/wgpu/pull/3538)
 - `create_texture` returns an error if `new_texture` returns NULL. By @jinleili in [#3554](https://github.com/gfx-rs/wgpu/pull/3554)
 - Fix shader bounds checking being ignored. By @FL33TW00D in [#3603](https://github.com/gfx-rs/wgpu/pull/3603)
 
 #### Vulkan
-
 - Treat `VK_SUBOPTIMAL_KHR` as `VK_SUCCESS` on Android due to rotation issues. By @James2022-rgb in [#3525](https://github.com/gfx-rs/wgpu/pull/3525)
 
 ### Examples
-
 - Use `BufferUsages::QUERY_RESOLVE` instead of `BufferUsages::COPY_DST` for buffers used in `CommandEncoder::resolve_query_set` calls in `mipmap` example. By @JolifantoBambla in [#3489](https://github.com/gfx-rs/wgpu/pull/3489)
 
 ## v0.15.3 (2023-03-22)
@@ -1290,25 +1256,22 @@ By @cwfitzgerald in [#3671](https://github.com/gfx-rs/wgpu/pull/3671).
 ### Bug Fixes
 
 #### Metal
-
 - Fix incorrect mipmap being sampled when using `MinLod <= 0.0` and `MaxLod >= 32.0` or when the fragment shader samples different Lods in the same quad. By @cwfitzgerald in [#3610](https://github.com/gfx-rs/wgpu/pull/3610).
 
 #### GLES
-
 - Fix `Vertex buffer is not big enough for the draw call.` for ANGLE/Web when rendering with instance attributes on a single instance. By @wumpf in [#3596](https://github.com/gfx-rs/wgpu/pull/3596)
 - Reset all queue state between command buffers in a submit. By @jleibs [#3589](https://github.com/gfx-rs/wgpu/pull/3589)
 - Reset the state of `SAMPLE_ALPHA_TO_COVERAGE` on queue reset. By @jleibs [#3589](https://github.com/gfx-rs/wgpu/pull/3589)
 
+
 ## wgpu-0.15.2 (2023-03-08)
 
 ### Bug Fixes
 
 #### Metal
-
 - Fix definition of `NSOperatingSystemVersion` to avoid potential crashes. By @grovesNL in [#3557](https://github.com/gfx-rs/wgpu/pull/3557)
 
 #### GLES
-
 - Enable `WEBGL_debug_renderer_info` before querying unmasked vendor/renderer to avoid crashing on emscripten in [#3519](https://github.com/gfx-rs/wgpu/pull/3519)
 
 ## wgpu-0.15.1 (2023-02-09)
@@ -1316,37 +1279,29 @@ By @cwfitzgerald in [#3671](https://github.com/gfx-rs/wgpu/pull/3671).
 ### Changes
 
 #### General
-
 - Fix for some minor issues in comments on some features. By @Wumpf in [#3455](https://github.com/gfx-rs/wgpu/pull/3455)
 
 #### Vulkan
-
 - Improve format MSAA capabilities detection. By @jinleili in [#3429](https://github.com/gfx-rs/wgpu/pull/3429)
 
 #### DX12
-
 - Update gpu allocator to 0.22. By @Elabajaba in [#3447](https://github.com/gfx-rs/wgpu/pull/3447)
 
 #### WebGPU
-
 - Implement `CommandEncoder::clear_buffer`. By @raphlinus in [#3426](https://github.com/gfx-rs/wgpu/pull/3426)
 
 ### Bug Fixes
 
 #### General
-
 - Re-sort supported surface formats based on srgb-ness. By @cwfitzgerald in [#3444](https://github.com/gfx-rs/wgpu/pull/3444)
 
 #### Vulkan
-
 - Fix surface view formats validation error. By @jinleili in [#3432](https://github.com/gfx-rs/wgpu/pull/3432)
 
 #### DX12
-
 - Fix DXC validation issues when using a custom `dxil_path`. By @Elabajaba in [#3434](https://github.com/gfx-rs/wgpu/pull/3434)
 
 #### GLES
-
 - Unbind vertex buffers at end of renderpass. By @cwfitzgerald in [#3459](https://github.com/gfx-rs/wgpu/pull/3459)
 
 #### WebGPU
@@ -1356,13 +1311,14 @@ By @cwfitzgerald in [#3671](https://github.com/gfx-rs/wgpu/pull/3671).
 ### Documentation
 
 #### General
-
 - Build for Wasm on docs.rs. By @daxpedda in [#3462](https://github.com/gfx-rs/wgpu/pull/3428)
 
+
 ## wgpu-0.15.0 (2023-01-25)
 
 ### Major Changes
 
+
 #### WGSL Top-Level `let` is now `const`
 
 All top level constants are now declared with `const`, catching up with the wgsl spec.
@@ -1374,7 +1330,7 @@ All top level constants are now declared with `const`, catching up with the wgsl
 +const SOME_CONSTANT = 12.0;
 ```
 
-See <https://github.com/gfx-rs/naga/blob/master/CHANGELOG.md#v011-2023-01-25> for smaller shader improvements.
+See https://github.com/gfx-rs/naga/blob/master/CHANGELOG.md#v011-2023-01-25 for smaller shader improvements.
 
 #### Surface Capabilities API
 
@@ -1460,7 +1416,7 @@ By @39ali in [3140](https://github.com/gfx-rs/wgpu/pull/3140)
 
 You can now choose to use the DXC compiler for DX12 instead of FXC. The DXC compiler is faster, less buggy, and allows for new features compared to the old, unmaintained FXC compiler.
 
-You can choose which compiler to use at `Instance` creation using the `dx12_shader_compiler` field in the `InstanceDescriptor` struct. Note that DXC requires both `dxcompiler.dll` and `dxil.dll`, which can be downloaded from <https://github.com/microsoft/DirectXShaderCompiler/releases>. Both .dlls need to be shipped with your application when targeting DX12 and using the `DXC` compiler. If the .dlls can't be loaded, then it will fall back to the FXC compiler. By @39ali and @Elabajaba in [#3356](https://github.com/gfx-rs/wgpu/pull/3356)
+You can choose which compiler to use at `Instance` creation using the `dx12_shader_compiler` field in the `InstanceDescriptor` struct. Note that DXC requires both `dxcompiler.dll` and `dxil.dll`, which can be downloaded from https://github.com/microsoft/DirectXShaderCompiler/releases. Both .dlls need to be shipped with your application when targeting DX12 and using the `DXC` compiler. If the .dlls can't be loaded, then it will fall back to the FXC compiler. By @39ali and @Elabajaba in [#3356](https://github.com/gfx-rs/wgpu/pull/3356)
 
 #### Suballocate DX12 buffers and textures
 
@@ -1542,6 +1498,7 @@ By @jimblandy in [#3254](https://github.com/gfx-rs/wgpu/pull/3254).
 - Implement `queue_validate_write_buffer` by @jinleili in [#3098](https://github.com/gfx-rs/wgpu/pull/3098)
 - Sync depth/stencil copy restrictions with the spec by @teoxoy in [#3314](https://github.com/gfx-rs/wgpu/pull/3314)
 
+
 ### Added/New Features
 
 #### General
@@ -1574,7 +1531,6 @@ By @jimblandy in [#3254](https://github.com/gfx-rs/wgpu/pull/3254).
 - Sync `TextureFormat.describe` with the spec. By @teoxoy in [3312](https://github.com/gfx-rs/wgpu/pull/3312)
 
 #### Metal
-
 - Add a way to create `Device` and `Queue` from raw Metal resources in wgpu-hal. By @AdrianEddy in [#3338](https://github.com/gfx-rs/wgpu/pull/3338)
 
 ### Bug Fixes
@@ -1584,12 +1540,12 @@ By @jimblandy in [#3254](https://github.com/gfx-rs/wgpu/pull/3254).
 - Update ndk-sys to v0.4.1+23.1.7779620, to fix checksum failures. By @jimblandy in [#3232](https://github.com/gfx-rs/wgpu/pull/3232).
 - Bother to free the `hal::Api::CommandBuffer` when a `wgpu_core::command::CommandEncoder` is dropped. By @jimblandy in [#3069](https://github.com/gfx-rs/wgpu/pull/3069).
 - Fixed the mipmap example by adding the missing WRITE_TIMESTAMP_INSIDE_PASSES feature. By @Olaroll in [#3081](https://github.com/gfx-rs/wgpu/pull/3081).
-- Avoid panicking in some interactions with invalid resources by @nical in [#3094](https://github.com/gfx-rs/wgpu/pull/3094)
+- Avoid panicking in some interactions with invalid resources by @nical in (#3094)[https://github.com/gfx-rs/wgpu/pull/3094]
 - Fixed an integer overflow in `copy_texture_to_texture` by @nical [#3090](https://github.com/gfx-rs/wgpu/pull/3090)
-- Remove `wgpu_types::Features::DEPTH24PLUS_STENCIL8`, making `wgpu::TextureFormat::Depth24PlusStencil8` available on all backends. By @Healthire in [#3151](https://github.com/gfx-rs/wgpu/pull/3151)
-- Fix an integer overflow in `queue_write_texture` by @nical in [#3146](https://github.com/gfx-rs/wgpu/pull/3146)
-- Make `RenderPassCompatibilityError` and `CreateShaderModuleError` not so huge. By @jimblandy in [#3226](https://github.com/gfx-rs/wgpu/pull/3226)
-- Check for invalid bitflag bits in wgpu-core and allow them to be captured/replayed by @nical in [#3229](https://github.com/gfx-rs/wgpu/pull/3229)
+- Remove `wgpu_types::Features::DEPTH24PLUS_STENCIL8`, making `wgpu::TextureFormat::Depth24PlusStencil8` available on all backends. By @Healthire in (#3151)[https://github.com/gfx-rs/wgpu/pull/3151]
+- Fix an integer overflow in `queue_write_texture` by @nical in (#3146)[https://github.com/gfx-rs/wgpu/pull/3146]
+- Make `RenderPassCompatibilityError` and `CreateShaderModuleError` not so huge. By @jimblandy in (#3226)[https://github.com/gfx-rs/wgpu/pull/3226]
+- Check for invalid bitflag bits in wgpu-core and allow them to be captured/replayed by @nical in (#3229)[https://github.com/gfx-rs/wgpu/pull/3229]
 - Evaluate `gfx_select!`'s `#[cfg]` conditions at the right time. By @jimblandy in [#3253](https://github.com/gfx-rs/wgpu/pull/3253)
 - Improve error messages when binding bind group with dynamic offsets. By @cwfitzgerald in [#3294](https://github.com/gfx-rs/wgpu/pull/3294)
 - Allow non-filtering sampling of integer textures. By @JMS55 in [#3362](https://github.com/gfx-rs/wgpu/pull/3362).
@@ -1599,7 +1555,6 @@ By @jimblandy in [#3254](https://github.com/gfx-rs/wgpu/pull/3254).
 - Make `make_spirv_raw` and `make_spirv` handle big-endian binaries. By @1e1001 in [#3411](https://github.com/gfx-rs/wgpu/pull/3411).
 
 #### Vulkan
-
 - Update ash to 0.37.1+1.3.235 to fix CI breaking by changing a call to the deprecated `debug_utils_set_object_name()` function to `set_debug_utils_object_name()` by @elabajaba in [#3273](https://github.com/gfx-rs/wgpu/pull/3273)
 - Document and improve extension detection. By @teoxoy in [#3327](https://github.com/gfx-rs/wgpu/pull/3327)
 - Don't use a pointer to a local copy of a `PhysicalDeviceDriverProperties` struct after it has gone out of scope. In fact, don't make a local copy at all. Introduce a helper function for building `CStr`s from C character arrays, and remove some `unsafe` blocks. By @jimblandy in [#3076](https://github.com/gfx-rs/wgpu/pull/3076).
@@ -1610,7 +1565,6 @@ By @jimblandy in [#3254](https://github.com/gfx-rs/wgpu/pull/3254).
 - Don't re-use `GraphicsCommandList` when `close` or `reset` fails. By @xiaopengli89 in [#3204](https://github.com/gfx-rs/wgpu/pull/3204)
 
 #### Metal
-
 - Fix texture view creation with full-resource views when using an explicit `mip_level_count` or `array_layer_count`. By @cwfitzgerald in [#3323](https://github.com/gfx-rs/wgpu/pull/3323)
 
 #### GLES
@@ -1738,7 +1692,7 @@ both `raw_window_handle::HasRawWindowHandle` and `raw_window_handle::HasRawDispl
 
 #### Vulkan
 
-- Fix `astc_hdr` formats support by @jinleili in [#2971]](<https://github.com/gfx-rs/wgpu/pull/2971>)
+- Fix `astc_hdr` formats support by @jinleili in [#2971]](https://github.com/gfx-rs/wgpu/pull/2971)
 - Update to Naga b209d911 (2022-9-1) to avoid generating SPIR-V that
   violates Vulkan valid usage rules `VUID-StandaloneSpirv-Flat-06202`
   and `VUID-StandaloneSpirv-Flat-04744`. By @jimblandy in

From e957a8553a7735043ce01e730cf3895872749dc4 Mon Sep 17 00:00:00 2001
From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com>
Date: Thu, 9 May 2024 10:48:33 +0200
Subject: [PATCH 27/31] Update wgpu-types/src/lib.rs

Co-authored-by: Connor Fitzgerald <connorwadefitzgerald@gmail.com>
---
 wgpu-types/src/lib.rs | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs
index 63af7c8aae..fa38bcab1c 100644
--- a/wgpu-types/src/lib.rs
+++ b/wgpu-types/src/lib.rs
@@ -918,6 +918,10 @@ bitflags::bitflags! {
         ///
         /// Supported platforms:
         /// - Vulkan
+        ///
+        /// Unimplemented Platforms:
+        /// - DX12
+        /// - Metal
         const PIPELINE_CACHE = 1 << 59;
     }
 }

From 1f11a64c659fe396aaab636c21e103121b287ffb Mon Sep 17 00:00:00 2001
From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com>
Date: Thu, 9 May 2024 11:08:36 +0200
Subject: [PATCH 28/31] Address review comments

---
 tests/tests/pipeline_cache.rs | 44 ++++++++++++++---------------------
 wgpu/src/context.rs           | 10 ++++----
 2 files changed, 23 insertions(+), 31 deletions(-)

diff --git a/tests/tests/pipeline_cache.rs b/tests/tests/pipeline_cache.rs
index 13b3e5b52c..22742fea15 100644
--- a/tests/tests/pipeline_cache.rs
+++ b/tests/tests/pipeline_cache.rs
@@ -2,20 +2,17 @@ use std::{fmt::Write, num::NonZeroU64};
 
 use wgpu_test::{gpu_test, GpuTestConfiguration, TestParameters, TestingContext};
 
-/// We want to test that partial updates to push constants work as expected.
+/// We want to test that using a pipeline cache doesn't cause failure
 ///
-/// As such, we dispatch two compute passes, one which writes the values
-/// before a partial update, and one which writes the values after the partial update.
-///
-/// If the update code is working correctly, the values not written to by the second update
-/// will remain unchanged.
+/// It would be nice if we could also assert that reusing a pipeline cache would make compilation
+/// be faster however, some drivers use a fallback pipeline cache, which makes this inconsistent
+/// (both intra- and inter-run).
 #[gpu_test]
 static PIPELINE_CACHE: GpuTestConfiguration = GpuTestConfiguration::new()
     .parameters(
         TestParameters::default()
             .test_features_limits()
-            .features(wgpu::Features::PIPELINE_CACHE)
-            .skip(wgpu_test::FailureCase::adapter("llvmpipe")),
+            .features(wgpu::Features::PIPELINE_CACHE),
     )
     .run_async(pipeline_cache_test);
 
@@ -24,6 +21,13 @@ const ARRAY_SIZE: u64 = 256;
 
 /// Create a shader which should be slow-ish to compile
 fn shader() -> String {
+    let mut body = String::new();
+    for idx in 0..ARRAY_SIZE {
+        // "Safety": There will only be a single workgroup, and a single thread in that workgroup
+        writeln!(body, "    output[{idx}] = {idx}u;")
+            .expect("`u64::fmt` and `String::write_fmt` are infallible");
+    }
+
     format!(
         r#"
         @group(0) @binding(0)
@@ -31,14 +35,9 @@ fn shader() -> String {
     
         @compute @workgroup_size(1)
         fn main() {{
-        {}
+        {body}
         }}
         "#,
-        (0..ARRAY_SIZE).fold(String::new(), |mut s, v| {
-            // "Safety": There will only be a single workgroup, and a single thread in that workgroup
-            writeln!(s, "    output[{v}] = {v}u;").expect("String");
-            s
-        })
     )
 }
 
@@ -99,7 +98,6 @@ async fn pipeline_cache_test(ctx: TestingContext) {
         });
 
     let first_cache_data;
-    let first_pipeline_duration;
     {
         let first_cache = unsafe {
             ctx.device
@@ -109,7 +107,6 @@ async fn pipeline_cache_test(ctx: TestingContext) {
                     fallback: false,
                 })
         };
-        let start = std::time::Instant::now();
         let first_pipeline = ctx
             .device
             .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
@@ -122,7 +119,6 @@ async fn pipeline_cache_test(ctx: TestingContext) {
                     ..Default::default()
                 },
             });
-        first_pipeline_duration = start.elapsed();
         validate_pipeline(&ctx, first_pipeline, &bind_group, &gpu_buffer, &cpu_buffer).await;
         first_cache_data = first_cache.get_data();
     }
@@ -136,7 +132,6 @@ async fn pipeline_cache_test(ctx: TestingContext) {
                 fallback: false,
             })
     };
-    let start = std::time::Instant::now();
     let first_pipeline = ctx
         .device
         .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
@@ -149,14 +144,12 @@ async fn pipeline_cache_test(ctx: TestingContext) {
                 ..Default::default()
             },
         });
-    let duration = start.elapsed();
     validate_pipeline(&ctx, first_pipeline, &bind_group, &gpu_buffer, &cpu_buffer).await;
-    if false {
-        // Ideally, we could make this assertion. However, that doesn't actually work, because drivers have
-        // their own internal caches. This does work on my machine if I set `MESA_DISABLE_PIPELINE_CACHE=1`
-        // before running the test; but of course that is not a realistic scenario
-        assert!(duration.as_millis() < first_pipeline_duration.as_millis());
-    }
+
+    // Ideally, we could assert here that the second compilation was faster than the first
+    // However, that doesn't actually work, because drivers have their own internal caches.
+    // This does work on my machine if I set `MESA_DISABLE_PIPELINE_CACHE=1`
+    // before running the test; but of course that is not a realistic scenario
 }
 
 async fn validate_pipeline(
@@ -180,7 +173,6 @@ async fn validate_pipeline(
         cpass.set_pipeline(&pipeline);
         cpass.set_bind_group(0, bind_group, &[]);
 
-        // -- Dispatch 0 --
         cpass.dispatch_workgroups(1, 1, 1);
     }
 
diff --git a/wgpu/src/context.rs b/wgpu/src/context.rs
index a85038cfa3..c29d88c2b7 100644
--- a/wgpu/src/context.rs
+++ b/wgpu/src/context.rs
@@ -12,11 +12,11 @@ use crate::{
     BufferDescriptor, CommandEncoderDescriptor, CompilationInfo, ComputePassDescriptor,
     ComputePipelineDescriptor, DeviceDescriptor, Error, ErrorFilter, ImageCopyBuffer,
     ImageCopyTexture, Maintain, MaintainResult, MapMode, PipelineCacheDescriptor,
-    PipelineCacheInitDescriptor, PipelineLayoutDescriptor, QuerySetDescriptor,
-    RenderBundleDescriptor, RenderBundleEncoderDescriptor, RenderPassDescriptor,
-    RenderPipelineDescriptor, RequestAdapterOptions, RequestDeviceError, SamplerDescriptor,
-    ShaderModuleDescriptor, ShaderModuleDescriptorSpirV, SurfaceTargetUnsafe, Texture,
-    TextureDescriptor, TextureViewDescriptor, UncapturedErrorHandler,
+    PipelineLayoutDescriptor, QuerySetDescriptor, RenderBundleDescriptor,
+    RenderBundleEncoderDescriptor, RenderPassDescriptor, RenderPipelineDescriptor,
+    RequestAdapterOptions, RequestDeviceError, SamplerDescriptor, ShaderModuleDescriptor,
+    ShaderModuleDescriptorSpirV, SurfaceTargetUnsafe, Texture, TextureDescriptor,
+    TextureViewDescriptor, UncapturedErrorHandler,
 };
 
 /// Meta trait for an id tracked by a context.

From f292531084dc89ae35831fee15e62c64f48abe2b Mon Sep 17 00:00:00 2001
From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com>
Date: Thu, 16 May 2024 12:49:48 +0100
Subject: [PATCH 29/31] Move the cache back to the root :(

This really makes the diff so much more unwieldy
---
 deno_webgpu/pipeline.rs                       |  5 +-
 examples/src/boids/mod.rs                     |  2 +
 examples/src/bunnymark/mod.rs                 |  1 +
 examples/src/conservative_raster/mod.rs       |  4 ++
 examples/src/cube/mod.rs                      |  2 +
 examples/src/hello_compute/mod.rs             |  1 +
 examples/src/hello_synchronization/mod.rs     |  2 +
 examples/src/hello_triangle/mod.rs            |  1 +
 examples/src/hello_workgroups/mod.rs          |  1 +
 examples/src/mipmap/mod.rs                    |  2 +
 examples/src/msaa_line/mod.rs                 |  1 +
 examples/src/render_to_texture/mod.rs         |  1 +
 examples/src/repeated_compute/mod.rs          |  1 +
 examples/src/shadow/mod.rs                    |  2 +
 examples/src/skybox/mod.rs                    |  2 +
 examples/src/srgb_blend/mod.rs                |  1 +
 examples/src/stencil_triangles/mod.rs         |  2 +
 examples/src/storage_texture/mod.rs           |  1 +
 examples/src/texture_arrays/mod.rs            |  1 +
 examples/src/timestamp_queries/mod.rs         |  2 +
 examples/src/uniform_values/mod.rs            |  1 +
 examples/src/water/mod.rs                     |  3 ++
 tests/src/image.rs                            |  1 +
 tests/tests/bgra8unorm_storage.rs             |  1 +
 tests/tests/bind_group_layout_dedup.rs        |  5 ++
 tests/tests/buffer.rs                         |  2 +
 tests/tests/device.rs                         |  3 ++
 tests/tests/mem_leaks.rs                      |  1 +
 tests/tests/nv12_texture/mod.rs               |  1 +
 tests/tests/occlusion_query/mod.rs            |  1 +
 tests/tests/partially_bounded_arrays/mod.rs   |  1 +
 tests/tests/pipeline.rs                       |  1 +
 tests/tests/pipeline_cache.rs                 | 12 ++---
 tests/tests/push_constants.rs                 |  1 +
 tests/tests/regression/issue_3349.rs          |  1 +
 tests/tests/regression/issue_3457.rs          |  2 +
 tests/tests/scissor_tests/mod.rs              |  1 +
 tests/tests/shader/mod.rs                     |  1 +
 tests/tests/shader/zero_init_workgroup_mem.rs |  2 +
 tests/tests/shader_primitive_index/mod.rs     |  1 +
 tests/tests/shader_view_format/mod.rs         |  1 +
 tests/tests/subgroup_operations/mod.rs        |  1 +
 tests/tests/vertex_indices/mod.rs             |  1 +
 wgpu-core/src/device/resource.rs              | 52 +++++++------------
 wgpu-core/src/pipeline.rs                     |  6 ++-
 wgpu-hal/examples/halmark/main.rs             |  3 +-
 wgpu-hal/examples/ray-traced-triangle/main.rs |  2 +-
 wgpu-hal/src/lib.rs                           |  7 +--
 wgpu-hal/src/vulkan/device.rs                 | 13 ++---
 wgpu/src/backend/wgpu_core.rs                 |  5 +-
 wgpu/src/lib.rs                               |  7 +--
 51 files changed, 106 insertions(+), 69 deletions(-)

diff --git a/deno_webgpu/pipeline.rs b/deno_webgpu/pipeline.rs
index 6ba3a3e516..b4d2f8d36e 100644
--- a/deno_webgpu/pipeline.rs
+++ b/deno_webgpu/pipeline.rs
@@ -114,8 +114,8 @@ pub fn op_webgpu_create_compute_pipeline(
             entry_point: compute.entry_point.map(Cow::from),
             constants: Cow::Owned(compute.constants),
             zero_initialize_workgroup_memory: true,
-            cache: None,
         },
+        cache: None,
     };
     let implicit_pipelines = match layout {
         GPUPipelineLayoutOrGPUAutoLayoutMode::Layout(_) => None,
@@ -363,7 +363,6 @@ pub fn op_webgpu_create_render_pipeline(
                 constants: Cow::Owned(fragment.constants),
                 // Required to be true for WebGPU
                 zero_initialize_workgroup_memory: true,
-                cache: None,
             },
             targets: Cow::Owned(fragment.targets),
         })
@@ -389,7 +388,6 @@ pub fn op_webgpu_create_render_pipeline(
                 constants: Cow::Owned(args.vertex.constants),
                 // Required to be true for WebGPU
                 zero_initialize_workgroup_memory: true,
-                cache: None,
             },
             buffers: Cow::Owned(vertex_buffers),
         },
@@ -398,6 +396,7 @@ pub fn op_webgpu_create_render_pipeline(
         multisample: args.multisample,
         fragment,
         multiview: None,
+        cache: None,
     };
 
     let implicit_pipelines = match args.layout {
diff --git a/examples/src/boids/mod.rs b/examples/src/boids/mod.rs
index 6c8bb6e76c..7b1b8f0bc3 100644
--- a/examples/src/boids/mod.rs
+++ b/examples/src/boids/mod.rs
@@ -156,6 +156,7 @@ impl crate::framework::Example for Example {
             depth_stencil: None,
             multisample: wgpu::MultisampleState::default(),
             multiview: None,
+            cache: None,
         });
 
         // create compute pipeline
@@ -166,6 +167,7 @@ impl crate::framework::Example for Example {
             module: &compute_shader,
             entry_point: "main",
             compilation_options: Default::default(),
+            cache: None,
         });
 
         // buffer for the three 2d triangle vertices of each instance
diff --git a/examples/src/bunnymark/mod.rs b/examples/src/bunnymark/mod.rs
index 679fc5014a..b5b33b54d5 100644
--- a/examples/src/bunnymark/mod.rs
+++ b/examples/src/bunnymark/mod.rs
@@ -224,6 +224,7 @@ impl crate::framework::Example for Example {
             depth_stencil: None,
             multisample: wgpu::MultisampleState::default(),
             multiview: None,
+            cache: None,
         });
 
         let texture = {
diff --git a/examples/src/conservative_raster/mod.rs b/examples/src/conservative_raster/mod.rs
index 89500a798f..116ed8623b 100644
--- a/examples/src/conservative_raster/mod.rs
+++ b/examples/src/conservative_raster/mod.rs
@@ -113,6 +113,7 @@ impl crate::framework::Example for Example {
                 depth_stencil: None,
                 multisample: wgpu::MultisampleState::default(),
                 multiview: None,
+                cache: None,
             });
 
         let pipeline_triangle_regular =
@@ -135,6 +136,7 @@ impl crate::framework::Example for Example {
                 depth_stencil: None,
                 multisample: wgpu::MultisampleState::default(),
                 multiview: None,
+                cache: None,
             });
 
         let pipeline_lines = if device
@@ -165,6 +167,7 @@ impl crate::framework::Example for Example {
                     depth_stencil: None,
                     multisample: wgpu::MultisampleState::default(),
                     multiview: None,
+                    cache: None,
                 }),
             )
         } else {
@@ -224,6 +227,7 @@ impl crate::framework::Example for Example {
                     depth_stencil: None,
                     multisample: wgpu::MultisampleState::default(),
                     multiview: None,
+                    cache: None,
                 }),
                 bind_group_layout,
             )
diff --git a/examples/src/cube/mod.rs b/examples/src/cube/mod.rs
index 9347627812..9828157e57 100644
--- a/examples/src/cube/mod.rs
+++ b/examples/src/cube/mod.rs
@@ -260,6 +260,7 @@ impl crate::framework::Example for Example {
             depth_stencil: None,
             multisample: wgpu::MultisampleState::default(),
             multiview: None,
+            cache: None,
         });
 
         let pipeline_wire = if device
@@ -301,6 +302,7 @@ impl crate::framework::Example for Example {
                 depth_stencil: None,
                 multisample: wgpu::MultisampleState::default(),
                 multiview: None,
+                cache: None,
             });
             Some(pipeline_wire)
         } else {
diff --git a/examples/src/hello_compute/mod.rs b/examples/src/hello_compute/mod.rs
index d04aaa4309..cdd6d439de 100644
--- a/examples/src/hello_compute/mod.rs
+++ b/examples/src/hello_compute/mod.rs
@@ -110,6 +110,7 @@ async fn execute_gpu_inner(
         module: &cs_module,
         entry_point: "main",
         compilation_options: Default::default(),
+        cache: None,
     });
 
     // Instantiates the bind group, once again specifying the binding of buffers.
diff --git a/examples/src/hello_synchronization/mod.rs b/examples/src/hello_synchronization/mod.rs
index 0a222fbe54..9b6675289c 100644
--- a/examples/src/hello_synchronization/mod.rs
+++ b/examples/src/hello_synchronization/mod.rs
@@ -104,6 +104,7 @@ async fn execute(
         module: &shaders_module,
         entry_point: "patient_main",
         compilation_options: Default::default(),
+        cache: None,
     });
     let hasty_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
         label: None,
@@ -111,6 +112,7 @@ async fn execute(
         module: &shaders_module,
         entry_point: "hasty_main",
         compilation_options: Default::default(),
+        cache: None,
     });
 
     //----------------------------------------------------------
diff --git a/examples/src/hello_triangle/mod.rs b/examples/src/hello_triangle/mod.rs
index 79162a6956..e4d42674f7 100644
--- a/examples/src/hello_triangle/mod.rs
+++ b/examples/src/hello_triangle/mod.rs
@@ -72,6 +72,7 @@ async fn run(event_loop: EventLoop<()>, window: Window) {
         depth_stencil: None,
         multisample: wgpu::MultisampleState::default(),
         multiview: None,
+        cache: None,
     });
 
     let mut config = surface
diff --git a/examples/src/hello_workgroups/mod.rs b/examples/src/hello_workgroups/mod.rs
index 572de36d3e..0416451da1 100644
--- a/examples/src/hello_workgroups/mod.rs
+++ b/examples/src/hello_workgroups/mod.rs
@@ -111,6 +111,7 @@ async fn run() {
         module: &shader,
         entry_point: "main",
         compilation_options: Default::default(),
+        cache: None,
     });
 
     //----------------------------------------------------------
diff --git a/examples/src/mipmap/mod.rs b/examples/src/mipmap/mod.rs
index 0848e94e10..eaed9c82e7 100644
--- a/examples/src/mipmap/mod.rs
+++ b/examples/src/mipmap/mod.rs
@@ -109,6 +109,7 @@ impl Example {
             depth_stencil: None,
             multisample: wgpu::MultisampleState::default(),
             multiview: None,
+            cache: None,
         });
 
         let bind_group_layout = pipeline.get_bind_group_layout(0);
@@ -310,6 +311,7 @@ impl crate::framework::Example for Example {
             depth_stencil: None,
             multisample: wgpu::MultisampleState::default(),
             multiview: None,
+            cache: None,
         });
 
         // Create bind group
diff --git a/examples/src/msaa_line/mod.rs b/examples/src/msaa_line/mod.rs
index cd22e75bc4..46bb743e99 100644
--- a/examples/src/msaa_line/mod.rs
+++ b/examples/src/msaa_line/mod.rs
@@ -78,6 +78,7 @@ impl Example {
                 ..Default::default()
             },
             multiview: None,
+            cache: None,
         });
         let mut encoder =
             device.create_render_bundle_encoder(&wgpu::RenderBundleEncoderDescriptor {
diff --git a/examples/src/render_to_texture/mod.rs b/examples/src/render_to_texture/mod.rs
index 5e571dc74e..caed736741 100644
--- a/examples/src/render_to_texture/mod.rs
+++ b/examples/src/render_to_texture/mod.rs
@@ -72,6 +72,7 @@ async fn run(_path: Option<String>) {
         depth_stencil: None,
         multisample: wgpu::MultisampleState::default(),
         multiview: None,
+        cache: None,
     });
 
     log::info!("Wgpu context set up.");
diff --git a/examples/src/repeated_compute/mod.rs b/examples/src/repeated_compute/mod.rs
index 55e87eed9a..72b615251e 100644
--- a/examples/src/repeated_compute/mod.rs
+++ b/examples/src/repeated_compute/mod.rs
@@ -246,6 +246,7 @@ impl WgpuContext {
             module: &shader,
             entry_point: "main",
             compilation_options: Default::default(),
+            cache: None,
         });
 
         WgpuContext {
diff --git a/examples/src/shadow/mod.rs b/examples/src/shadow/mod.rs
index 2cb6d6f3e2..b2c27f5892 100644
--- a/examples/src/shadow/mod.rs
+++ b/examples/src/shadow/mod.rs
@@ -526,6 +526,7 @@ impl crate::framework::Example for Example {
                 }),
                 multisample: wgpu::MultisampleState::default(),
                 multiview: None,
+                cache: None,
             });
 
             Pass {
@@ -660,6 +661,7 @@ impl crate::framework::Example for Example {
                 }),
                 multisample: wgpu::MultisampleState::default(),
                 multiview: None,
+                cache: None,
             });
 
             Pass {
diff --git a/examples/src/skybox/mod.rs b/examples/src/skybox/mod.rs
index 35a4266d20..e526feedae 100644
--- a/examples/src/skybox/mod.rs
+++ b/examples/src/skybox/mod.rs
@@ -221,6 +221,7 @@ impl crate::framework::Example for Example {
             }),
             multisample: wgpu::MultisampleState::default(),
             multiview: None,
+            cache: None,
         });
         let entity_pipeline = device.create_render_pipeline(&wgpu::RenderPipelineDescriptor {
             label: Some("Entity"),
@@ -254,6 +255,7 @@ impl crate::framework::Example for Example {
             }),
             multisample: wgpu::MultisampleState::default(),
             multiview: None,
+            cache: None,
         });
 
         let sampler = device.create_sampler(&wgpu::SamplerDescriptor {
diff --git a/examples/src/srgb_blend/mod.rs b/examples/src/srgb_blend/mod.rs
index f701aff989..314fc92df2 100644
--- a/examples/src/srgb_blend/mod.rs
+++ b/examples/src/srgb_blend/mod.rs
@@ -151,6 +151,7 @@ impl<const SRGB: bool> crate::framework::Example for Example<SRGB> {
             depth_stencil: None,
             multisample: wgpu::MultisampleState::default(),
             multiview: None,
+            cache: None,
         });
 
         // Done
diff --git a/examples/src/stencil_triangles/mod.rs b/examples/src/stencil_triangles/mod.rs
index e0f495177f..8d638d20d1 100644
--- a/examples/src/stencil_triangles/mod.rs
+++ b/examples/src/stencil_triangles/mod.rs
@@ -106,6 +106,7 @@ impl crate::framework::Example for Example {
             }),
             multisample: wgpu::MultisampleState::default(),
             multiview: None,
+            cache: None,
         });
 
         let outer_pipeline = device.create_render_pipeline(&wgpu::RenderPipelineDescriptor {
@@ -141,6 +142,7 @@ impl crate::framework::Example for Example {
             }),
             multisample: wgpu::MultisampleState::default(),
             multiview: None,
+            cache: None,
         });
 
         let stencil_buffer = device.create_texture(&wgpu::TextureDescriptor {
diff --git a/examples/src/storage_texture/mod.rs b/examples/src/storage_texture/mod.rs
index 02900c8918..04253e8185 100644
--- a/examples/src/storage_texture/mod.rs
+++ b/examples/src/storage_texture/mod.rs
@@ -101,6 +101,7 @@ async fn run(_path: Option<String>) {
         module: &shader,
         entry_point: "main",
         compilation_options: Default::default(),
+        cache: None,
     });
 
     log::info!("Wgpu context set up.");
diff --git a/examples/src/texture_arrays/mod.rs b/examples/src/texture_arrays/mod.rs
index dd7b4ec89a..b0f474b957 100644
--- a/examples/src/texture_arrays/mod.rs
+++ b/examples/src/texture_arrays/mod.rs
@@ -341,6 +341,7 @@ impl crate::framework::Example for Example {
             depth_stencil: None,
             multisample: wgpu::MultisampleState::default(),
             multiview: None,
+            cache: None
         });
 
         Self {
diff --git a/examples/src/timestamp_queries/mod.rs b/examples/src/timestamp_queries/mod.rs
index 7a501637d4..703bafe490 100644
--- a/examples/src/timestamp_queries/mod.rs
+++ b/examples/src/timestamp_queries/mod.rs
@@ -299,6 +299,7 @@ fn compute_pass(
         module,
         entry_point: "main_cs",
         compilation_options: Default::default(),
+        cache: None,
     });
     let bind_group_layout = compute_pipeline.get_bind_group_layout(0);
     let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
@@ -366,6 +367,7 @@ fn render_pass(
         depth_stencil: None,
         multisample: wgpu::MultisampleState::default(),
         multiview: None,
+        cache: None,
     });
     let render_target = device.create_texture(&wgpu::TextureDescriptor {
         label: Some("rendertarget"),
diff --git a/examples/src/uniform_values/mod.rs b/examples/src/uniform_values/mod.rs
index 06780c8aef..c53a189722 100644
--- a/examples/src/uniform_values/mod.rs
+++ b/examples/src/uniform_values/mod.rs
@@ -192,6 +192,7 @@ impl WgpuContext {
             depth_stencil: None,
             multisample: wgpu::MultisampleState::default(),
             multiview: None,
+            cache: None,
         });
         let surface_config = surface
             .get_default_config(&adapter, size.width, size.height)
diff --git a/examples/src/water/mod.rs b/examples/src/water/mod.rs
index 94f12895a8..b21ec70c4d 100644
--- a/examples/src/water/mod.rs
+++ b/examples/src/water/mod.rs
@@ -574,6 +574,8 @@ impl crate::framework::Example for Example {
             // No multisampling is used.
             multisample: wgpu::MultisampleState::default(),
             multiview: None,
+            // No pipeline caching is used
+            cache: None,
         });
 
         // Same idea as the water pipeline.
@@ -610,6 +612,7 @@ impl crate::framework::Example for Example {
             }),
             multisample: wgpu::MultisampleState::default(),
             multiview: None,
+            cache: None
         });
 
         // A render bundle to draw the terrain.
diff --git a/tests/src/image.rs b/tests/src/image.rs
index 8996f361cd..19bbc1a913 100644
--- a/tests/src/image.rs
+++ b/tests/src/image.rs
@@ -370,6 +370,7 @@ fn copy_via_compute(
         module: &sm,
         entry_point: "copy_texture_to_buffer",
         compilation_options: Default::default(),
+        cache: None,
     });
 
     {
diff --git a/tests/tests/bgra8unorm_storage.rs b/tests/tests/bgra8unorm_storage.rs
index 17082a9ed4..7bc117f097 100644
--- a/tests/tests/bgra8unorm_storage.rs
+++ b/tests/tests/bgra8unorm_storage.rs
@@ -98,6 +98,7 @@ static BGRA8_UNORM_STORAGE: GpuTestConfiguration = GpuTestConfiguration::new()
             entry_point: "main",
             compilation_options: Default::default(),
             module: &module,
+            cache: None,
         });
 
         let mut encoder =
diff --git a/tests/tests/bind_group_layout_dedup.rs b/tests/tests/bind_group_layout_dedup.rs
index 3466e1e244..3d74e62cba 100644
--- a/tests/tests/bind_group_layout_dedup.rs
+++ b/tests/tests/bind_group_layout_dedup.rs
@@ -91,6 +91,7 @@ async fn bgl_dedupe(ctx: TestingContext) {
             module: &module,
             entry_point: "no_resources",
             compilation_options: Default::default(),
+            cache: None,
         };
 
         let pipeline = ctx.device.create_compute_pipeline(&desc);
@@ -220,6 +221,7 @@ fn bgl_dedupe_with_dropped_user_handle(ctx: TestingContext) {
             module: &module,
             entry_point: "no_resources",
             compilation_options: Default::default(),
+            cache: None,
         });
 
     let mut encoder = ctx.device.create_command_encoder(&Default::default());
@@ -266,6 +268,7 @@ fn bgl_dedupe_derived(ctx: TestingContext) {
             module: &module,
             entry_point: "resources",
             compilation_options: Default::default(),
+            cache: None,
         });
 
     // We create two bind groups, pulling the bind_group_layout from the pipeline each time.
@@ -337,6 +340,7 @@ fn separate_programs_have_incompatible_derived_bgls(ctx: TestingContext) {
         module: &module,
         entry_point: "resources",
         compilation_options: Default::default(),
+        cache: None,
     };
     // Create two pipelines, creating a BG from the second.
     let pipeline1 = ctx.device.create_compute_pipeline(&desc);
@@ -399,6 +403,7 @@ fn derived_bgls_incompatible_with_regular_bgls(ctx: TestingContext) {
             module: &module,
             entry_point: "resources",
             compilation_options: Default::default(),
+            cache: None,
         });
 
     // Create a matching BGL
diff --git a/tests/tests/buffer.rs b/tests/tests/buffer.rs
index 0693877d00..2410267315 100644
--- a/tests/tests/buffer.rs
+++ b/tests/tests/buffer.rs
@@ -225,6 +225,7 @@ static MINIMUM_BUFFER_BINDING_SIZE_LAYOUT: GpuTestConfiguration = GpuTestConfigu
                     module: &shader_module,
                     entry_point: "main",
                     compilation_options: Default::default(),
+                    cache: None,
                 });
         });
     });
@@ -294,6 +295,7 @@ static MINIMUM_BUFFER_BINDING_SIZE_DISPATCH: GpuTestConfiguration = GpuTestConfi
                 module: &shader_module,
                 entry_point: "main",
                 compilation_options: Default::default(),
+                cache: None,
             });
 
         let buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor {
diff --git a/tests/tests/device.rs b/tests/tests/device.rs
index 649a850fa9..3e78293296 100644
--- a/tests/tests/device.rs
+++ b/tests/tests/device.rs
@@ -488,6 +488,7 @@ static DEVICE_DESTROY_THEN_MORE: GpuTestConfiguration = GpuTestConfiguration::ne
                     multisample: wgpu::MultisampleState::default(),
                     fragment: None,
                     multiview: None,
+                    cache: None,
                 });
         });
 
@@ -500,6 +501,7 @@ static DEVICE_DESTROY_THEN_MORE: GpuTestConfiguration = GpuTestConfiguration::ne
                     module: &shader_module,
                     entry_point: "",
                     compilation_options: Default::default(),
+                    cache: None,
                 });
         });
 
@@ -757,6 +759,7 @@ fn vs_main() -> @builtin(position) vec4<f32> {
                 depth_stencil: None,
                 multisample: wgt::MultisampleState::default(),
                 multiview: None,
+                cache: None
             });
 
         // fail(&ctx.device, || {
diff --git a/tests/tests/mem_leaks.rs b/tests/tests/mem_leaks.rs
index 7002ebabe0..3c59aec036 100644
--- a/tests/tests/mem_leaks.rs
+++ b/tests/tests/mem_leaks.rs
@@ -113,6 +113,7 @@ async fn draw_test_with_reports(
                 })],
             }),
             multiview: None,
+            cache: None,
         });
 
     let global_report = ctx.instance.generate_report().unwrap();
diff --git a/tests/tests/nv12_texture/mod.rs b/tests/tests/nv12_texture/mod.rs
index 70ee849831..fa386f8653 100644
--- a/tests/tests/nv12_texture/mod.rs
+++ b/tests/tests/nv12_texture/mod.rs
@@ -41,6 +41,7 @@ static NV12_TEXTURE_CREATION_SAMPLING: GpuTestConfiguration = GpuTestConfigurati
                 depth_stencil: None,
                 multisample: wgpu::MultisampleState::default(),
                 multiview: None,
+                cache: None,
             });
 
         let tex = ctx.device.create_texture(&wgpu::TextureDescriptor {
diff --git a/tests/tests/occlusion_query/mod.rs b/tests/tests/occlusion_query/mod.rs
index 1a68ecf79d..a888320e28 100644
--- a/tests/tests/occlusion_query/mod.rs
+++ b/tests/tests/occlusion_query/mod.rs
@@ -51,6 +51,7 @@ static OCCLUSION_QUERY: GpuTestConfiguration = GpuTestConfiguration::new()
                 }),
                 multisample: wgpu::MultisampleState::default(),
                 multiview: None,
+                cache: None,
             });
 
         // Create occlusion query set
diff --git a/tests/tests/partially_bounded_arrays/mod.rs b/tests/tests/partially_bounded_arrays/mod.rs
index 11eee5b207..83f9cee382 100644
--- a/tests/tests/partially_bounded_arrays/mod.rs
+++ b/tests/tests/partially_bounded_arrays/mod.rs
@@ -70,6 +70,7 @@ static PARTIALLY_BOUNDED_ARRAY: GpuTestConfiguration = GpuTestConfiguration::new
             module: &cs_module,
             entry_point: "main",
             compilation_options: Default::default(),
+            cache: None,
         });
 
         let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
diff --git a/tests/tests/pipeline.rs b/tests/tests/pipeline.rs
index a07e158a53..0d725b8f40 100644
--- a/tests/tests/pipeline.rs
+++ b/tests/tests/pipeline.rs
@@ -29,6 +29,7 @@ static PIPELINE_DEFAULT_LAYOUT_BAD_MODULE: GpuTestConfiguration = GpuTestConfigu
                     module: &module,
                     entry_point: "doesn't exist",
                     compilation_options: Default::default(),
+                    cache: None,
                 });
 
             pipeline.get_bind_group_layout(0);
diff --git a/tests/tests/pipeline_cache.rs b/tests/tests/pipeline_cache.rs
index 22742fea15..58dae4694f 100644
--- a/tests/tests/pipeline_cache.rs
+++ b/tests/tests/pipeline_cache.rs
@@ -114,10 +114,8 @@ async fn pipeline_cache_test(ctx: TestingContext) {
                 layout: Some(&pipeline_layout),
                 module: &sm,
                 entry_point: "main",
-                compilation_options: wgpu::PipelineCompilationOptions {
-                    cache: Some(&first_cache),
-                    ..Default::default()
-                },
+                compilation_options: Default::default(),
+                cache: Some(&first_cache),
             });
         validate_pipeline(&ctx, first_pipeline, &bind_group, &gpu_buffer, &cpu_buffer).await;
         first_cache_data = first_cache.get_data();
@@ -139,10 +137,8 @@ async fn pipeline_cache_test(ctx: TestingContext) {
             layout: Some(&pipeline_layout),
             module: &sm,
             entry_point: "main",
-            compilation_options: wgpu::PipelineCompilationOptions {
-                cache: Some(&second_cache),
-                ..Default::default()
-            },
+            compilation_options: Default::default(),
+            cache: Some(&second_cache),
         });
     validate_pipeline(&ctx, first_pipeline, &bind_group, &gpu_buffer, &cpu_buffer).await;
 
diff --git a/tests/tests/push_constants.rs b/tests/tests/push_constants.rs
index 04d9a00f7d..a18207bef6 100644
--- a/tests/tests/push_constants.rs
+++ b/tests/tests/push_constants.rs
@@ -104,6 +104,7 @@ async fn partial_update_test(ctx: TestingContext) {
             module: &sm,
             entry_point: "main",
             compilation_options: Default::default(),
+            cache: None,
         });
 
     let mut encoder = ctx
diff --git a/tests/tests/regression/issue_3349.rs b/tests/tests/regression/issue_3349.rs
index 74c466b45a..35d35e5bdf 100644
--- a/tests/tests/regression/issue_3349.rs
+++ b/tests/tests/regression/issue_3349.rs
@@ -119,6 +119,7 @@ async fn multi_stage_data_binding_test(ctx: TestingContext) {
             depth_stencil: None,
             multisample: wgpu::MultisampleState::default(),
             multiview: None,
+            cache: None,
         });
 
     let texture = ctx.device.create_texture(&wgpu::TextureDescriptor {
diff --git a/tests/tests/regression/issue_3457.rs b/tests/tests/regression/issue_3457.rs
index f18d681ae1..f0f7e64636 100644
--- a/tests/tests/regression/issue_3457.rs
+++ b/tests/tests/regression/issue_3457.rs
@@ -80,6 +80,7 @@ static PASS_RESET_VERTEX_BUFFER: GpuTestConfiguration =
                     })],
                 }),
                 multiview: None,
+                cache: None,
             });
 
         let single_pipeline = ctx
@@ -111,6 +112,7 @@ static PASS_RESET_VERTEX_BUFFER: GpuTestConfiguration =
                     })],
                 }),
                 multiview: None,
+                cache: None,
             });
 
         let view = ctx
diff --git a/tests/tests/scissor_tests/mod.rs b/tests/tests/scissor_tests/mod.rs
index 15c35644e5..3f1e7df135 100644
--- a/tests/tests/scissor_tests/mod.rs
+++ b/tests/tests/scissor_tests/mod.rs
@@ -61,6 +61,7 @@ async fn scissor_test_impl(
                 })],
             }),
             multiview: None,
+            cache: None,
         });
 
     let readback_buffer = image::ReadbackBuffers::new(&ctx.device, &texture);
diff --git a/tests/tests/shader/mod.rs b/tests/tests/shader/mod.rs
index 6ece08652f..5ae201f1d2 100644
--- a/tests/tests/shader/mod.rs
+++ b/tests/tests/shader/mod.rs
@@ -309,6 +309,7 @@ async fn shader_input_output_test(
                 module: &sm,
                 entry_point: "cs_main",
                 compilation_options: Default::default(),
+                cache: None,
             });
 
         // -- Initializing data --
diff --git a/tests/tests/shader/zero_init_workgroup_mem.rs b/tests/tests/shader/zero_init_workgroup_mem.rs
index cb9f341ee5..0dcb81959b 100644
--- a/tests/tests/shader/zero_init_workgroup_mem.rs
+++ b/tests/tests/shader/zero_init_workgroup_mem.rs
@@ -88,6 +88,7 @@ static ZERO_INIT_WORKGROUP_MEMORY: GpuTestConfiguration = GpuTestConfiguration::
                 module: &sm,
                 entry_point: "read",
                 compilation_options: Default::default(),
+                cache: None,
             });
 
         let pipeline_write = ctx
@@ -98,6 +99,7 @@ static ZERO_INIT_WORKGROUP_MEMORY: GpuTestConfiguration = GpuTestConfiguration::
                 module: &sm,
                 entry_point: "write",
                 compilation_options: Default::default(),
+                cache: None,
             });
 
         // -- Initializing data --
diff --git a/tests/tests/shader_primitive_index/mod.rs b/tests/tests/shader_primitive_index/mod.rs
index fb43397830..9972f81aa1 100644
--- a/tests/tests/shader_primitive_index/mod.rs
+++ b/tests/tests/shader_primitive_index/mod.rs
@@ -147,6 +147,7 @@ async fn pulling_common(
                 })],
             }),
             multiview: None,
+            cache: None,
         });
 
     let width = 2;
diff --git a/tests/tests/shader_view_format/mod.rs b/tests/tests/shader_view_format/mod.rs
index 53c642bf7a..d34b8d851d 100644
--- a/tests/tests/shader_view_format/mod.rs
+++ b/tests/tests/shader_view_format/mod.rs
@@ -109,6 +109,7 @@ async fn reinterpret(
             depth_stencil: None,
             multisample: wgpu::MultisampleState::default(),
             multiview: None,
+            cache: None,
         });
     let bind_group = ctx.device.create_bind_group(&wgpu::BindGroupDescriptor {
         layout: &pipeline.get_bind_group_layout(0),
diff --git a/tests/tests/subgroup_operations/mod.rs b/tests/tests/subgroup_operations/mod.rs
index 2c518a9d93..7d0aec8241 100644
--- a/tests/tests/subgroup_operations/mod.rs
+++ b/tests/tests/subgroup_operations/mod.rs
@@ -75,6 +75,7 @@ static SUBGROUP_OPERATIONS: GpuTestConfiguration = GpuTestConfiguration::new()
             module: &cs_module,
             entry_point: "main",
             compilation_options: Default::default(),
+            cache: None,
         });
 
         let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
diff --git a/tests/tests/vertex_indices/mod.rs b/tests/tests/vertex_indices/mod.rs
index cad7e731d1..7bd172d850 100644
--- a/tests/tests/vertex_indices/mod.rs
+++ b/tests/tests/vertex_indices/mod.rs
@@ -295,6 +295,7 @@ async fn vertex_index_common(ctx: TestingContext) {
             })],
         }),
         multiview: None,
+        cache: None,
     };
     let builtin_pipeline = ctx.device.create_render_pipeline(&pipeline_desc);
     pipeline_desc.vertex.entry_point = "vs_main_buffers";
diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index 08b495d3fd..7ab1fa8c94 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -2823,7 +2823,7 @@ impl<A: HalApi> Device<A> {
             Device::make_late_sized_buffer_groups(&shader_binding_sizes, &pipeline_layout);
 
         let cache = 'cache: {
-            let Some(cache) = desc.stage.cache else {
+            let Some(cache) = desc.cache else {
                 break 'cache None;
             };
             let Ok(cache) = hub.pipeline_caches.get(cache) else {
@@ -2844,8 +2844,8 @@ impl<A: HalApi> Device<A> {
                 entry_point: final_entry_point_name.as_ref(),
                 constants: desc.stage.constants.as_ref(),
                 zero_initialize_workgroup_memory: desc.stage.zero_initialize_workgroup_memory,
-                cache: cache.as_ref().and_then(|it| it.raw.as_ref()),
             },
+            cache: cache.as_ref().and_then(|it| it.raw.as_ref()),
         };
 
         let raw = unsafe {
@@ -3219,7 +3219,7 @@ impl<A: HalApi> Device<A> {
 
         let vertex_shader_module;
         let vertex_entry_point_name;
-        let vertex_cache;
+
         let vertex_stage = {
             let stage_desc = &desc.vertex.stage;
             let stage = wgt::ShaderStages::VERTEX;
@@ -3257,32 +3257,16 @@ impl<A: HalApi> Device<A> {
                 validated_stages |= stage;
             }
 
-            vertex_cache = 'cache: {
-                let Some(cache) = stage_desc.cache else {
-                    break 'cache None;
-                };
-                let Ok(cache) = hub.pipeline_caches.get(cache) else {
-                    break 'cache None;
-                };
-
-                if cache.device.as_info().id() != self.as_info().id() {
-                    return Err(DeviceError::WrongDevice.into());
-                }
-                Some(cache)
-            };
-
             hal::ProgrammableStage {
                 module: vertex_shader_module.raw(),
                 entry_point: &vertex_entry_point_name,
                 constants: stage_desc.constants.as_ref(),
                 zero_initialize_workgroup_memory: stage_desc.zero_initialize_workgroup_memory,
-                cache: vertex_cache.as_ref().and_then(|it| it.raw.as_ref()),
             }
         };
 
         let mut fragment_shader_module = None;
         let fragment_entry_point_name;
-        let fragment_cache;
         let fragment_stage = match desc.fragment {
             Some(ref fragment_state) => {
                 let stage = wgt::ShaderStages::FRAGMENT;
@@ -3334,20 +3318,6 @@ impl<A: HalApi> Device<A> {
                         })?;
                 }
 
-                fragment_cache = 'cache: {
-                    let Some(cache) = fragment_state.stage.cache else {
-                        break 'cache None;
-                    };
-                    let Ok(cache) = hub.pipeline_caches.get(cache) else {
-                        break 'cache None;
-                    };
-
-                    if cache.device.as_info().id() != self.as_info().id() {
-                        return Err(DeviceError::WrongDevice.into());
-                    }
-                    Some(cache)
-                };
-
                 Some(hal::ProgrammableStage {
                     module: shader_module.raw(),
                     entry_point: &fragment_entry_point_name,
@@ -3355,7 +3325,6 @@ impl<A: HalApi> Device<A> {
                     zero_initialize_workgroup_memory: fragment_state
                         .stage
                         .zero_initialize_workgroup_memory,
-                    cache: fragment_cache.as_ref().and_then(|it| it.raw.as_ref()),
                 })
             }
             None => None,
@@ -3445,6 +3414,20 @@ impl<A: HalApi> Device<A> {
         let late_sized_buffer_groups =
             Device::make_late_sized_buffer_groups(&shader_binding_sizes, &pipeline_layout);
 
+        let pipeline_cache = 'cache: {
+            let Some(cache) = desc.cache else {
+                break 'cache None;
+            };
+            let Ok(cache) = hub.pipeline_caches.get(cache) else {
+                break 'cache None;
+            };
+
+            if cache.device.as_info().id() != self.as_info().id() {
+                return Err(DeviceError::WrongDevice.into());
+            }
+            Some(cache)
+        };
+
         let pipeline_desc = hal::RenderPipelineDescriptor {
             label: desc.label.to_hal(self.instance_flags),
             layout: pipeline_layout.raw(),
@@ -3456,6 +3439,7 @@ impl<A: HalApi> Device<A> {
             fragment_stage,
             color_targets,
             multiview: desc.multiview,
+            cache: pipeline_cache.as_ref().and_then(|it| it.raw.as_ref()),
         };
         let raw = unsafe {
             self.raw
diff --git a/wgpu-core/src/pipeline.rs b/wgpu-core/src/pipeline.rs
index 4b7f402662..d02915fa64 100644
--- a/wgpu-core/src/pipeline.rs
+++ b/wgpu-core/src/pipeline.rs
@@ -166,8 +166,6 @@ pub struct ProgrammableStageDescriptor<'a> {
     /// This is required by the WebGPU spec, but may have overhead which can be avoided
     /// for cross-platform applications
     pub zero_initialize_workgroup_memory: bool,
-    /// The pipeline cache to use when creating this pipeline.
-    pub cache: Option<PipelineCacheId>,
 }
 
 /// Number of implicit bind groups derived at pipeline creation.
@@ -195,6 +193,8 @@ pub struct ComputePipelineDescriptor<'a> {
     pub layout: Option<PipelineLayoutId>,
     /// The compiled compute stage and its entry point.
     pub stage: ProgrammableStageDescriptor<'a>,
+    /// The pipeline cache to use when creating this pipeline.
+    pub cache: Option<PipelineCacheId>,
 }
 
 #[derive(Clone, Debug, Error)]
@@ -380,6 +380,8 @@ pub struct RenderPipelineDescriptor<'a> {
     /// If the pipeline will be used with a multiview render pass, this indicates how many array
     /// layers the attachments will have.
     pub multiview: Option<NonZeroU32>,
+    /// The pipeline cache to use when creating this pipeline.
+    pub cache: Option<PipelineCacheId>,
 }
 
 #[derive(Clone, Debug)]
diff --git a/wgpu-hal/examples/halmark/main.rs b/wgpu-hal/examples/halmark/main.rs
index f91c1d5778..ee59fa2590 100644
--- a/wgpu-hal/examples/halmark/main.rs
+++ b/wgpu-hal/examples/halmark/main.rs
@@ -254,7 +254,6 @@ impl<A: hal::Api> Example<A> {
                 entry_point: "vs_main",
                 constants: &constants,
                 zero_initialize_workgroup_memory: true,
-                cache: None,
             },
             vertex_buffers: &[],
             fragment_stage: Some(hal::ProgrammableStage {
@@ -262,7 +261,6 @@ impl<A: hal::Api> Example<A> {
                 entry_point: "fs_main",
                 constants: &constants,
                 zero_initialize_workgroup_memory: true,
-                cache: None,
             }),
             primitive: wgt::PrimitiveState {
                 topology: wgt::PrimitiveTopology::TriangleStrip,
@@ -276,6 +274,7 @@ impl<A: hal::Api> Example<A> {
                 write_mask: wgt::ColorWrites::default(),
             })],
             multiview: None,
+            cache: None,
         };
         let pipeline = unsafe { device.create_render_pipeline(&pipeline_desc).unwrap() };
 
diff --git a/wgpu-hal/examples/ray-traced-triangle/main.rs b/wgpu-hal/examples/ray-traced-triangle/main.rs
index f6806bb132..8f404dc4d2 100644
--- a/wgpu-hal/examples/ray-traced-triangle/main.rs
+++ b/wgpu-hal/examples/ray-traced-triangle/main.rs
@@ -373,8 +373,8 @@ impl<A: hal::Api> Example<A> {
                     entry_point: "main",
                     constants: &Default::default(),
                     zero_initialize_workgroup_memory: true,
-                    cache: None,
                 },
+                cache: None,
             })
         }
         .unwrap();
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index cc68efdbb2..16cc5fe218 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -1637,8 +1637,6 @@ pub struct ProgrammableStage<'a, A: Api> {
     /// This is required by the WebGPU spec, but may have overhead which can be avoided
     /// for cross-platform applications
     pub zero_initialize_workgroup_memory: bool,
-    /// The cache which will be used and filled when compiling this pipeline
-    pub cache: Option<&'a A::PipelineCache>,
 }
 
 // Rust gets confused about the impl requirements for `A`
@@ -1649,7 +1647,6 @@ impl<A: Api> Clone for ProgrammableStage<'_, A> {
             entry_point: self.entry_point,
             constants: self.constants,
             zero_initialize_workgroup_memory: self.zero_initialize_workgroup_memory,
-            cache: self.cache,
         }
     }
 }
@@ -1662,6 +1659,8 @@ pub struct ComputePipelineDescriptor<'a, A: Api> {
     pub layout: &'a A::PipelineLayout,
     /// The compiled compute stage and its entry point.
     pub stage: ProgrammableStage<'a, A>,
+    /// The cache which will be used and filled when compiling this pipeline
+    pub cache: Option<&'a A::PipelineCache>,
 }
 
 pub struct PipelineCacheDescriptor<'a> {
@@ -1703,6 +1702,8 @@ pub struct RenderPipelineDescriptor<'a, A: Api> {
     /// If the pipeline will be used with a multiview render pass, this indicates how many array
     /// layers the attachments will have.
     pub multiview: Option<NonZeroU32>,
+    /// The cache which will be used and filled when compiling this pipeline
+    pub cache: Option<&'a A::PipelineCache>,
 }
 
 #[derive(Debug, Clone)]
diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs
index ec91167822..f948a1d41c 100644
--- a/wgpu-hal/src/vulkan/device.rs
+++ b/wgpu-hal/src/vulkan/device.rs
@@ -1863,15 +1863,9 @@ impl crate::Device for super::Device {
                 .build()
         }];
 
-        let vertex_cache = desc.vertex_stage.cache.map(|it| it.raw);
-        let fragment_cache = desc
-            .fragment_stage
-            .as_ref()
-            .and_then(|it| it.cache)
-            .map(|it| it.raw);
-        // TODO: What should the behaviour be when both are set and different?
-        let pipeline_cache = vertex_cache
-            .or(fragment_cache)
+        let pipeline_cache = desc
+            .cache
+            .map(|it| it.raw)
             .unwrap_or(vk::PipelineCache::null());
 
         let mut raw_vec = {
@@ -1927,7 +1921,6 @@ impl crate::Device for super::Device {
         }];
 
         let pipeline_cache = desc
-            .stage
             .cache
             .map(|it| it.raw)
             .unwrap_or(vk::PipelineCache::null());
diff --git a/wgpu/src/backend/wgpu_core.rs b/wgpu/src/backend/wgpu_core.rs
index ee113d3fc8..4362e6dca0 100644
--- a/wgpu/src/backend/wgpu_core.rs
+++ b/wgpu/src/backend/wgpu_core.rs
@@ -1167,7 +1167,6 @@ impl crate::Context for ContextWgpuCore {
                         .vertex
                         .compilation_options
                         .zero_initialize_workgroup_memory,
-                    cache: desc.vertex.compilation_options.cache.map(|c| c.id.into()),
                 },
                 buffers: Borrowed(&vertex_buffers),
             },
@@ -1182,11 +1181,11 @@ impl crate::Context for ContextWgpuCore {
                     zero_initialize_workgroup_memory: frag
                         .compilation_options
                         .zero_initialize_workgroup_memory,
-                    cache: frag.compilation_options.cache.map(|c| c.id.into()),
                 },
                 targets: Borrowed(frag.targets),
             }),
             multiview: desc.multiview,
+            cache: desc.cache.map(|c| c.id.into()),
         };
 
         let (id, error) = wgc::gfx_select!(device => self.0.device_create_render_pipeline(
@@ -1235,8 +1234,8 @@ impl crate::Context for ContextWgpuCore {
                 zero_initialize_workgroup_memory: desc
                     .compilation_options
                     .zero_initialize_workgroup_memory,
-                cache: desc.compilation_options.cache.map(|c| c.id.into()),
             },
+            cache: desc.cache.map(|c| c.id.into()),
         };
 
         let (id, error) = wgc::gfx_select!(device => self.0.device_create_compute_pipeline(
diff --git a/wgpu/src/lib.rs b/wgpu/src/lib.rs
index a1e350b3d9..141cb01034 100644
--- a/wgpu/src/lib.rs
+++ b/wgpu/src/lib.rs
@@ -1932,6 +1932,8 @@ pub struct RenderPipelineDescriptor<'a> {
     /// If the pipeline will be used with a multiview render pass, this indicates how many array
     /// layers the attachments will have.
     pub multiview: Option<NonZeroU32>,
+    /// The pipeline cache to use when creating this pipeline.
+    pub cache: Option<&'a PipelineCache>,
 }
 #[cfg(send_sync)]
 static_assertions::assert_impl_all!(RenderPipelineDescriptor<'_>: Send, Sync);
@@ -1991,8 +1993,6 @@ pub struct PipelineCompilationOptions<'a> {
     /// This is required by the WebGPU spec, but may have overhead which can be avoided
     /// for cross-platform applications
     pub zero_initialize_workgroup_memory: bool,
-    /// The pipeline cache to use when creating this pipeline.
-    pub cache: Option<&'a PipelineCache>,
 }
 
 impl<'a> Default for PipelineCompilationOptions<'a> {
@@ -2006,7 +2006,6 @@ impl<'a> Default for PipelineCompilationOptions<'a> {
         Self {
             constants,
             zero_initialize_workgroup_memory: true,
-            cache: None,
         }
     }
 }
@@ -2032,6 +2031,8 @@ pub struct ComputePipelineDescriptor<'a> {
     ///
     /// This implements `Default`, and for most users can be set to `Default::default()`
     pub compilation_options: PipelineCompilationOptions<'a>,
+    /// The pipeline cache to use when creating this pipeline.
+    pub cache: Option<&'a PipelineCache>,
 }
 #[cfg(send_sync)]
 static_assertions::assert_impl_all!(ComputePipelineDescriptor<'_>: Send, Sync);

From 96da24a7cb93c7aa8b90ddf02fddc2e23ebcfdb0 Mon Sep 17 00:00:00 2001
From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com>
Date: Thu, 16 May 2024 14:03:15 +0100
Subject: [PATCH 30/31] Fix ash upgrade issue

---
 wgpu-hal/src/vulkan/device.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs
index 53480ccea9..1ea627897f 100644
--- a/wgpu-hal/src/vulkan/device.rs
+++ b/wgpu-hal/src/vulkan/device.rs
@@ -1954,7 +1954,7 @@ impl crate::Device for super::Device {
         &self,
         desc: &crate::PipelineCacheDescriptor<'_>,
     ) -> Result<PipelineCache, crate::PipelineCacheError> {
-        let mut info = vk::PipelineCacheCreateInfo::builder();
+        let mut info = vk::PipelineCacheCreateInfo::default();
         if let Some(data) = desc.data {
             info = info.initial_data(data)
         }

From 56bb2ead816d4a85fc9753073123f87965bb86c1 Mon Sep 17 00:00:00 2001
From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com>
Date: Thu, 16 May 2024 14:12:40 +0100
Subject: [PATCH 31/31] Fix new test

---
 tests/tests/compute_pass_resource_ownership.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/tests/compute_pass_resource_ownership.rs b/tests/tests/compute_pass_resource_ownership.rs
index 6612ad0068..4d48c2ad9e 100644
--- a/tests/tests/compute_pass_resource_ownership.rs
+++ b/tests/tests/compute_pass_resource_ownership.rs
@@ -161,6 +161,7 @@ fn resource_setup(ctx: &TestingContext) -> ResourceSetup {
             module: &sm,
             entry_point: "main",
             compilation_options: Default::default(),
+            cache: None,
         });
 
     ResourceSetup {